diff --git a/src/arch/amdgpu/vega/SConscript b/src/arch/amdgpu/vega/SConscript index 019ef279b3..b7a28a8d6c 100644 --- a/src/arch/amdgpu/vega/SConscript +++ b/src/arch/amdgpu/vega/SConscript @@ -49,13 +49,32 @@ Source('tlb_coalescer.cc') DebugFlag('GPUPTWalker', 'Debug flag for GPU page table walker') if env['CONF']['TARGET_GPU_ISA'] == 'vega': - Source('decoder.cc') + Source('gpu_decoder.cc') Source('insts/gpu_static_inst.cc') Source('insts/instructions.cc') Source('insts/op_encodings.cc') - Source('isa.cc') - Source('registers.cc') + Source('gpu_isa.cc') + Source('gpu_registers.cc') + Source('insts/sop2.cc') + Source('insts/sopk.cc') + Source('insts/sop1.cc') + Source('insts/sopc.cc') + Source('insts/sopp.cc') + Source('insts/smem.cc') + Source('insts/vop2.cc') + Source('insts/vop1.cc') + Source('insts/vopc.cc') + Source('insts/vinterp.cc') + Source('insts/vop3.cc') + Source('insts/vop3_cmp.cc') + Source('insts/ds.cc') + Source('insts/mubuf.cc') + Source('insts/mtbuf.cc') + Source('insts/mimg.cc') + Source('insts/exp.cc') + Source('insts/flat.cc') Source('insts/vop3p.cc') + Source('insts/vop3p_mai.cc') DebugFlag('VEGA', 'Debug flag for VEGA GPU ISA') diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc similarity index 99% rename from src/arch/amdgpu/vega/decoder.cc rename to src/arch/amdgpu/vega/gpu_decoder.cc index 5e2402a4af..940840719b 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -29,9 +29,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "arch/amdgpu/vega/gpu_decoder.hh" + #include -#include "arch/amdgpu/vega/gpu_decoder.hh" #include "arch/amdgpu/vega/insts/gpu_static_inst.hh" #include "arch/amdgpu/vega/insts/instructions.hh" #include "arch/amdgpu/vega/insts/vop3p.hh" diff --git a/src/arch/amdgpu/vega/isa.cc b/src/arch/amdgpu/vega/gpu_isa.cc similarity index 100% rename from src/arch/amdgpu/vega/isa.cc rename to src/arch/amdgpu/vega/gpu_isa.cc diff --git a/src/arch/amdgpu/vega/registers.cc b/src/arch/amdgpu/vega/gpu_registers.cc similarity index 100% rename from src/arch/amdgpu/vega/registers.cc rename to src/arch/amdgpu/vega/gpu_registers.cc diff --git a/src/arch/amdgpu/vega/insts/ds.cc b/src/arch/amdgpu/vega/insts/ds.cc new file mode 100644 index 0000000000..17acdaa287 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/ds.cc @@ -0,0 +1,4657 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_DS__DS_ADD_U32 class methods --- + + Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_u32") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_U32 + + Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() + { + } // ~Inst_DS__DS_ADD_U32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] += DATA; + void + Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_SUB_U32 class methods --- + + Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_u32") + { + } // Inst_DS__DS_SUB_U32 + + Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() + { + } // ~Inst_DS__DS_SUB_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_U32 class methods --- + + Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_u32") + { + } // Inst_DS__DS_RSUB_U32 + + Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() + { + } // ~Inst_DS__DS_RSUB_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_U32 class methods --- + + Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_u32") + { + } // Inst_DS__DS_INC_U32 + + Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() + { + } // ~Inst_DS__DS_INC_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_U32 class methods --- + + Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_u32") + { + } // Inst_DS__DS_DEC_U32 + + Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() + { + } // ~Inst_DS__DS_DEC_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_I32 class methods --- + + Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_i32") + { + } // Inst_DS__DS_MIN_I32 + + Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() + { + } // ~Inst_DS__DS_MIN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_I32 class methods --- + + Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_i32") + { + } // Inst_DS__DS_MAX_I32 + + Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() + { + } // ~Inst_DS__DS_MAX_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_U32 class methods --- + + Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_u32") + { + } // Inst_DS__DS_MIN_U32 + + Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() + { + } // ~Inst_DS__DS_MIN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_U32 class methods --- + + Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_u32") + { + } // Inst_DS__DS_MAX_U32 + + Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() + { + } // ~Inst_DS__DS_MAX_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_B32 class methods --- + + Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_b32") + { + } // Inst_DS__DS_AND_B32 + + Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() + { + } // ~Inst_DS__DS_AND_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_B32 class methods --- + + Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_b32") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicOr); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_OR_B32 + + Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() + { + } // ~Inst_DS__DS_OR_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] |= DATA; + void + Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_OR_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_OR_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + + // --- Inst_DS__DS_XOR_B32 class methods --- + + Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_b32") + { + } // Inst_DS__DS_XOR_B32 + + Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() + { + } // ~Inst_DS__DS_XOR_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_B32 class methods --- + + Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_b32") + { + } // Inst_DS__DS_MSKOR_B32 + + Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() + { + } // ~Inst_DS__DS_MSKOR_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B32 class methods --- + + Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B32 + + Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() + { + } // ~Inst_DS__DS_WRITE_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] = DATA. + // Write dword. + void + Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE2_B32 class methods --- + + Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2_B32 + + Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() + { + } // ~Inst_DS__DS_WRITE2_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. + // Write 2 dwords. + void + Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 2] + = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4; + Addr offset1 = instData.OFFSET1 * 4; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_WRITE2ST64_B32 class methods --- + + Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2st64_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2ST64_B32 + + Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() + { + } // ~Inst_DS__DS_WRITE2ST64_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; + // Write 2 dwords. + void + Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 2] + = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4 * 64; + Addr offset1 = instData.OFFSET1 * 4 * 64; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_CMPST_B32 class methods --- + + Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_b32") + { + } // Inst_DS__DS_CMPST_B32 + + Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() + { + } // ~Inst_DS__DS_CMPST_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP opcode. + void + Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_F32 class methods --- + + Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_f32") + { + setFlag(F32); + } // Inst_DS__DS_CMPST_F32 + + Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() + { + } // ~Inst_DS__DS_CMPST_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP opcode. + void + Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_F32 class methods --- + + Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_F32 + + Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() + { + } // ~Inst_DS__DS_MIN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN. + void + Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_F32 class methods --- + + Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_F32 + + Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() + { + } // ~Inst_DS__DS_MAX_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX. + void + Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_NOP class methods --- + + Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_nop") + { + setFlag(Nop); + } // Inst_DS__DS_NOP + + Inst_DS__DS_NOP::~Inst_DS__DS_NOP() + { + } // ~Inst_DS__DS_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) + { + gpuDynInst->wavefront()->decLGKMInstsIssued(); + } // execute + // --- Inst_DS__DS_ADD_F32 class methods --- + + Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_f32") + { + setFlag(F32); + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_F32 + + Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() + { + } // ~Inst_DS__DS_ADD_F32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] += DATA; + // Floating point add that handles NaN/INF/denormal values. + void + Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandF32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B8 class methods --- + + Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b8") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B8 + + Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() + { + } // ~Inst_DS__DS_WRITE_B8 + + // --- description from .arch file --- + // MEM[ADDR] = DATA[7:0]. + // Byte write. + void + Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU8 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B8_D16_HI class methods --- + + Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b8_d16_hi") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B8_D16_HI + + Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI() + { + } // ~Inst_DS__DS_WRITE_B8_D16_HI + + // --- description from .arch file --- + // MEM[ADDR] = DATA[23:16]. + // Byte write in to high word. + void + Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU8 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = bits(data[lane], 23, 16); + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B16 class methods --- + + Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b16") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B16 + + Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() + { + } // ~Inst_DS__DS_WRITE_B16 + + // --- description from .arch file --- + // MEM[ADDR] = DATA[15:0] + // Short write. + void + Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU16 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_ADD_RTN_U32 class methods --- + + Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_u32") + { + } // Inst_DS__DS_ADD_RTN_U32 + + Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() + { + } // ~Inst_DS__DS_ADD_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_RTN_U32 class methods --- + + Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_rtn_u32") + { + } // Inst_DS__DS_SUB_RTN_U32 + + Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() + { + } // ~Inst_DS__DS_SUB_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_RTN_U32 class methods --- + + Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_rtn_u32") + { + } // Inst_DS__DS_RSUB_RTN_U32 + + Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() + { + } // ~Inst_DS__DS_RSUB_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_RTN_U32 class methods --- + + Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_rtn_u32") + { + } // Inst_DS__DS_INC_RTN_U32 + + Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() + { + } // ~Inst_DS__DS_INC_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_RTN_U32 class methods --- + + Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_rtn_u32") + { + } // Inst_DS__DS_DEC_RTN_U32 + + Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() + { + } // ~Inst_DS__DS_DEC_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_I32 class methods --- + + Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_i32") + { + } // Inst_DS__DS_MIN_RTN_I32 + + Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() + { + } // ~Inst_DS__DS_MIN_RTN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_I32 class methods --- + + Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_i32") + { + } // Inst_DS__DS_MAX_RTN_I32 + + Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() + { + } // ~Inst_DS__DS_MAX_RTN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_U32 class methods --- + + Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_u32") + { + } // Inst_DS__DS_MIN_RTN_U32 + + Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() + { + } // ~Inst_DS__DS_MIN_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_U32 class methods --- + + Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_u32") + { + } // Inst_DS__DS_MAX_RTN_U32 + + Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() + { + } // ~Inst_DS__DS_MAX_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_RTN_B32 class methods --- + + Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_rtn_b32") + { + } // Inst_DS__DS_AND_RTN_B32 + + Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() + { + } // ~Inst_DS__DS_AND_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_RTN_B32 class methods --- + + Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_rtn_b32") + { + } // Inst_DS__DS_OR_RTN_B32 + + Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() + { + } // ~Inst_DS__DS_OR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_RTN_B32 class methods --- + + Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_rtn_b32") + { + } // Inst_DS__DS_XOR_RTN_B32 + + Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() + { + } // ~Inst_DS__DS_XOR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_RTN_B32 class methods --- + + Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_rtn_b32") + { + } // Inst_DS__DS_MSKOR_RTN_B32 + + Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() + { + } // ~Inst_DS__DS_MSKOR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") + { + } // Inst_DS__DS_WRXCHG_RTN_B32 + + Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG_RTN_B32 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + // Write-exchange operation. + void + Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") + { + } // Inst_DS__DS_WRXCHG2_RTN_B32 + + Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG2_RTN_B32 + + // --- description from .arch file --- + // Write-exchange 2 separate dwords. + void + Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") + { + } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 + + Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 + + // --- description from .arch file --- + // Write-exchange 2 separate dwords with a stride of 64 dwords. + void + Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_B32 class methods --- + + Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_b32") + { + } // Inst_DS__DS_CMPST_RTN_B32 + + Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() + { + } // ~Inst_DS__DS_CMPST_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP opcode. + void + Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_F32 class methods --- + + Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_CMPST_RTN_F32 + + Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() + { + } // ~Inst_DS__DS_CMPST_RTN_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP opcode. + void + Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_F32 class methods --- + + Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_RTN_F32 + + Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() + { + } // ~Inst_DS__DS_MIN_RTN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN. + void + Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_F32 class methods --- + + Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_RTN_F32 + + Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() + { + } // ~Inst_DS__DS_MAX_RTN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX. + void + Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRAP_RTN_B32 class methods --- + + Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrap_rtn_b32") + { + } // Inst_DS__DS_WRAP_RTN_B32 + + Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() + { + } // ~Inst_DS__DS_WRAP_RTN_B32 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; + // RETURN_DATA = tmp. + void + Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_RTN_F32 class methods --- + + Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_ADD_RTN_F32 + + Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() + { + } // ~Inst_DS__DS_ADD_RTN_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + // Floating point add that handles NaN/INF/denormal values. + void + Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_B32 class methods --- + + Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B32 + + Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() + { + } // ~Inst_DS__DS_READ_B32 + + // --- description from .arch file --- + // RETURN_DATA = MEM[ADDR]. + // Dword read. + void + Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ2_B32 class methods --- + + Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2_B32 + + Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() + { + } // ~Inst_DS__DS_READ2_B32 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. + // Read 2 dwords. + void + Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4; + Addr offset1 = instData.OFFSET1 * 4; + + initDualMemRead(gpuDynInst, offset0, offset1); + } // initiateAcc + + void + Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_DS__DS_READ2ST64_B32 class methods --- + + Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2st64_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2ST64_B32 + + Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() + { + } // ~Inst_DS__DS_READ2ST64_B32 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. + // Read 2 dwords. + void + Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = (instData.OFFSET0 * 4 * 64); + Addr offset1 = (instData.OFFSET1 * 4 * 64); + + initDualMemRead(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } + // --- Inst_DS__DS_READ_I8 class methods --- + + Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_i8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_I8 + + Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() + { + } // ~Inst_DS__DS_READ_I8 + + // --- description from .arch file --- + // RETURN_DATA = signext(MEM[ADDR][7:0]). + // Signed byte read. + void + Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_U8 class methods --- + + Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U8 + + Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() + { + } // ~Inst_DS__DS_READ_U8 + + // --- description from .arch file --- + // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. + // Unsigned byte read. + void + Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)(reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_I16 class methods --- + + Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_i16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_I16 + + Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() + { + } // ~Inst_DS__DS_READ_I16 + + // --- description from .arch file --- + // RETURN_DATA = signext(MEM[ADDR][15:0]). + // Signed short read. + void + Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_U16 class methods --- + + Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U16 + + Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() + { + } // ~Inst_DS__DS_READ_U16 + + // --- description from .arch file --- + // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. + // Unsigned short read. + void + Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + void + Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)(reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_SWIZZLE_B32 class methods --- + + Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_swizzle_b32") + { + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + } // Inst_DS__DS_SWIZZLE_B32 + + Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() + { + } // ~Inst_DS__DS_SWIZZLE_B32 + + // --- description from .arch file --- + // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). + // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for + // --- details. + void + Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + /** + * The "DS pattern" is comprised of both offset fields. That is, the + * swizzle pattern between lanes. Bit 15 of the DS pattern dictates + * which swizzle mode to use. There are two different swizzle + * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use + * QDMode else use Bit-masks mode. The remaining bits dictate how to + * swizzle the lanes. + * + * QDMode: Chunks the lanes into 4s and swizzles among them. + * Bits 7:6 dictate where lane 3 (of the current chunk) + * gets its date, 5:4 lane 2, etc. + * + * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. + * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 + * is the and_mask. Each lane is swizzled by performing + * the appropriate operation using these masks. + */ + VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); + + data.read(); + + if (bits(ds_pattern, 15)) { + // QDMode + for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { + /** + * This operation allows data sharing between groups + * of four consecutive threads. Note the increment by + * 4 in the for loop. + */ + if (gpuDynInst->exec_mask[lane]) { + int index0 = lane + bits(ds_pattern, 1, 0); + panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index0); + vdst[lane] + = gpuDynInst->exec_mask[index0] ? data[index0]: 0; + } + if (gpuDynInst->exec_mask[lane + 1]) { + int index1 = lane + bits(ds_pattern, 3, 2); + panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index1); + vdst[lane + 1] + = gpuDynInst->exec_mask[index1] ? data[index1]: 0; + } + if (gpuDynInst->exec_mask[lane + 2]) { + int index2 = lane + bits(ds_pattern, 5, 4); + panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index2); + vdst[lane + 2] + = gpuDynInst->exec_mask[index2] ? data[index2]: 0; + } + if (gpuDynInst->exec_mask[lane + 3]) { + int index3 = lane + bits(ds_pattern, 7, 6); + panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index3); + vdst[lane + 3] + = gpuDynInst->exec_mask[index3] ? data[index3]: 0; + } + } + } else { + // Bit Mode + int and_mask = bits(ds_pattern, 4, 0); + int or_mask = bits(ds_pattern, 9, 5); + int xor_mask = bits(ds_pattern, 14, 10); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + int index = (((lane & and_mask) | or_mask) ^ xor_mask); + // Adjust for the next 32 lanes. + if (lane > 31) { + index += 32; + } + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " + "out of bounds.\n", gpuDynInst->disassemble(), + index); + vdst[lane] + = gpuDynInst->exec_mask[index] ? data[index] : 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + // --- Inst_DS__DS_PERMUTE_B32 class methods --- + + Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_permute_b32") + { + setFlag(MemoryRef); + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + } // Inst_DS__DS_PERMUTE_B32 + + Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() + { + } // ~Inst_DS__DS_PERMUTE_B32 + + // --- description from .arch file --- + // Forward permute. + void + Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + addr.read(); + data.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + /** + * One of the offset fields can be used for the index. + * It is assumed OFFSET0 would be used, as OFFSET1 is + * typically only used for DS ops that operate on two + * disparate pieces of data. + */ + assert(!instData.OFFSET1); + /** + * The address provided is a byte address, but VGPRs are + * 4 bytes, so we must divide by 4 to get the actual VGPR + * index. Additionally, the index is calculated modulo the + * WF size, 64 in this case, so we simply extract bits 7-2. + */ + int index = bits(addr[lane] + instData.OFFSET0, 7, 2); + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " + "of bounds.\n", gpuDynInst->disassemble(), index); + /** + * If the shuffled index corresponds to a lane that is + * inactive then this instruction writes a 0 to the active + * lane in VDST. + */ + if (wf->execMask(index)) { + vdst[index] = data[lane]; + } else { + vdst[index] = 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + // --- Inst_DS__DS_BPERMUTE_B32 class methods --- + + Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_bpermute_b32") + { + setFlag(MemoryRef); + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + } // Inst_DS__DS_BPERMUTE_B32 + + Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() + { + } // ~Inst_DS__DS_BPERMUTE_B32 + + // --- description from .arch file --- + // Backward permute. + void + Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + addr.read(); + data.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + /** + * One of the offset fields can be used for the index. + * It is assumed OFFSET0 would be used, as OFFSET1 is + * typically only used for DS ops that operate on two + * disparate pieces of data. + */ + assert(!instData.OFFSET1); + /** + * The address provided is a byte address, but VGPRs are + * 4 bytes, so we must divide by 4 to get the actual VGPR + * index. Additionally, the index is calculated modulo the + * WF size, 64 in this case, so we simply extract bits 7-2. + */ + int index = bits(addr[lane] + instData.OFFSET0, 7, 2); + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " + "of bounds.\n", gpuDynInst->disassemble(), index); + /** + * If the shuffled index corresponds to a lane that is + * inactive then this instruction writes a 0 to the active + * lane in VDST. + */ + if (wf->execMask(index)) { + vdst[lane] = data[index]; + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + + // --- Inst_DS__DS_ADD_U64 class methods --- + + Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_u64") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_U64 + + Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() + { + } // ~Inst_DS__DS_ADD_U64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR] += DATA[0:1]; + void + Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_SUB_U64 class methods --- + + Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_u64") + { + } // Inst_DS__DS_SUB_U64 + + Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() + { + } // ~Inst_DS__DS_SUB_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_U64 class methods --- + + Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_u64") + { + } // Inst_DS__DS_RSUB_U64 + + Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() + { + } // ~Inst_DS__DS_RSUB_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_U64 class methods --- + + Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_u64") + { + } // Inst_DS__DS_INC_U64 + + Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() + { + } // ~Inst_DS__DS_INC_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_U64 class methods --- + + Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_u64") + { + } // Inst_DS__DS_DEC_U64 + + Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() + { + } // ~Inst_DS__DS_DEC_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_I64 class methods --- + + Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_i64") + { + } // Inst_DS__DS_MIN_I64 + + Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() + { + } // ~Inst_DS__DS_MIN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_I64 class methods --- + + Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_i64") + { + } // Inst_DS__DS_MAX_I64 + + Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() + { + } // ~Inst_DS__DS_MAX_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_U64 class methods --- + + Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_u64") + { + } // Inst_DS__DS_MIN_U64 + + Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() + { + } // ~Inst_DS__DS_MIN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_U64 class methods --- + + Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_u64") + { + } // Inst_DS__DS_MAX_U64 + + Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() + { + } // ~Inst_DS__DS_MAX_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_B64 class methods --- + + Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_b64") + { + } // Inst_DS__DS_AND_B64 + + Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() + { + } // ~Inst_DS__DS_AND_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_B64 class methods --- + + Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_b64") + { + } // Inst_DS__DS_OR_B64 + + Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() + { + } // ~Inst_DS__DS_OR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_B64 class methods --- + + Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_b64") + { + } // Inst_DS__DS_XOR_B64 + + Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() + { + } // ~Inst_DS__DS_XOR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_B64 class methods --- + + Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_b64") + { + } // Inst_DS__DS_MSKOR_B64 + + Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() + { + } // ~Inst_DS__DS_MSKOR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B64 class methods --- + + Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B64 + + Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() + { + } // ~Inst_DS__DS_WRITE_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR] = DATA. + // Write qword. + void + Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE2_B64 class methods --- + + Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2_B64 + + Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() + { + } // ~Inst_DS__DS_WRITE2_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. + // Write 2 qwords. + void + Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8; + Addr offset1 = instData.OFFSET1 * 8; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_WRITE2ST64_B64 class methods --- + + Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2st64_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2ST64_B64 + + Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() + { + } // ~Inst_DS__DS_WRITE2ST64_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; + // Write 2 qwords. + void + Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8 * 64; + Addr offset1 = instData.OFFSET1 * 8 * 64; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_CMPST_B64 class methods --- + + Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_b64") + { + } // Inst_DS__DS_CMPST_B64 + + Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() + { + } // ~Inst_DS__DS_CMPST_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_F64 class methods --- + + Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_f64") + { + setFlag(F64); + } // Inst_DS__DS_CMPST_F64 + + Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() + { + } // ~Inst_DS__DS_CMPST_F64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_F64 class methods --- + + Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_F64 + + Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() + { + } // ~Inst_DS__DS_MIN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN_X2. + void + Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_F64 class methods --- + + Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_F64 + + Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() + { + } // ~Inst_DS__DS_MAX_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX_X2. + void + Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_RTN_U64 class methods --- + + Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_u64") + { + } // Inst_DS__DS_ADD_RTN_U64 + + Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() + { + } // ~Inst_DS__DS_ADD_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_RTN_U64 class methods --- + + Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_rtn_u64") + { + } // Inst_DS__DS_SUB_RTN_U64 + + Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() + { + } // ~Inst_DS__DS_SUB_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_RTN_U64 class methods --- + + Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_rtn_u64") + { + } // Inst_DS__DS_RSUB_RTN_U64 + + Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() + { + } // ~Inst_DS__DS_RSUB_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_RTN_U64 class methods --- + + Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_rtn_u64") + { + } // Inst_DS__DS_INC_RTN_U64 + + Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() + { + } // ~Inst_DS__DS_INC_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_RTN_U64 class methods --- + + Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_rtn_u64") + { + } // Inst_DS__DS_DEC_RTN_U64 + + Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() + { + } // ~Inst_DS__DS_DEC_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_I64 class methods --- + + Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_i64") + { + } // Inst_DS__DS_MIN_RTN_I64 + + Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() + { + } // ~Inst_DS__DS_MIN_RTN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_I64 class methods --- + + Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_i64") + { + } // Inst_DS__DS_MAX_RTN_I64 + + Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() + { + } // ~Inst_DS__DS_MAX_RTN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_U64 class methods --- + + Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_u64") + { + } // Inst_DS__DS_MIN_RTN_U64 + + Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() + { + } // ~Inst_DS__DS_MIN_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_U64 class methods --- + + Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_u64") + { + } // Inst_DS__DS_MAX_RTN_U64 + + Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() + { + } // ~Inst_DS__DS_MAX_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_RTN_B64 class methods --- + + Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_rtn_b64") + { + } // Inst_DS__DS_AND_RTN_B64 + + Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() + { + } // ~Inst_DS__DS_AND_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_RTN_B64 class methods --- + + Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_rtn_b64") + { + } // Inst_DS__DS_OR_RTN_B64 + + Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() + { + } // ~Inst_DS__DS_OR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_RTN_B64 class methods --- + + Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_rtn_b64") + { + } // Inst_DS__DS_XOR_RTN_B64 + + Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() + { + } // ~Inst_DS__DS_XOR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_RTN_B64 class methods --- + + Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_rtn_b64") + { + } // Inst_DS__DS_MSKOR_RTN_B64 + + Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() + { + } // ~Inst_DS__DS_MSKOR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") + { + } // Inst_DS__DS_WRXCHG_RTN_B64 + + Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG_RTN_B64 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + // Write-exchange operation. + void + Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") + { + } // Inst_DS__DS_WRXCHG2_RTN_B64 + + Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG2_RTN_B64 + + // --- description from .arch file --- + // Write-exchange 2 separate qwords. + void + Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") + { + } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 + + Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 + + // --- description from .arch file --- + // Write-exchange 2 qwords with a stride of 64 qwords. + void + Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_B64 class methods --- + + Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_b64") + { + } // Inst_DS__DS_CMPST_RTN_B64 + + Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() + { + } // ~Inst_DS__DS_CMPST_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_F64 class methods --- + + Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_CMPST_RTN_F64 + + Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() + { + } // ~Inst_DS__DS_CMPST_RTN_F64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_F64 class methods --- + + Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_RTN_F64 + + Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() + { + } // ~Inst_DS__DS_MIN_RTN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN_X2. + void + Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_F64 class methods --- + + Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_RTN_F64 + + Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() + { + } // ~Inst_DS__DS_MAX_RTN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX_X2. + void + Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_B64 class methods --- + + Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B64 + + Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() + { + } // ~Inst_DS__DS_READ_B64 + + // --- description from .arch file --- + // RETURN_DATA = MEM[ADDR]. + // Read 1 qword. + void + Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ2_B64 class methods --- + + Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2_B64 + + Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() + { + } // ~Inst_DS__DS_READ2_B64 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. + // Read 2 qwords. + void + Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8; + Addr offset1 = instData.OFFSET1 * 8; + + initDualMemRead(gpuDynInst, offset0, offset1); + } // initiateAcc + + void + Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst0(gpuDynInst, extData.VDST); + VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_DS__DS_READ2ST64_B64 class methods --- + + Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2st64_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2ST64_B64 + + Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() + { + } // ~Inst_DS__DS_READ2ST64_B64 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. + // Read 2 qwords. + void + Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = (instData.OFFSET0 * 8 * 64); + Addr offset1 = (instData.OFFSET1 * 8 * 64); + + initDualMemRead(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst0(gpuDynInst, extData.VDST); + VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } + // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods --- + + Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") + { + } // Inst_DS__DS_CONDXCHG32_RTN_B64 + + Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() + { + } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 + + // --- description from .arch file --- + // Conditional write exchange. + void + Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_U32 class methods --- + + Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_u32") + { + } // Inst_DS__DS_ADD_SRC2_U32 + + Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() + { + } // ~Inst_DS__DS_ADD_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] + MEM[B]. + void + Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_SRC2_U32 class methods --- + + Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_src2_u32") + { + } // Inst_DS__DS_SUB_SRC2_U32 + + Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() + { + } // ~Inst_DS__DS_SUB_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] - MEM[B]. + void + Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_SRC2_U32 class methods --- + + Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_src2_u32") + { + } // Inst_DS__DS_RSUB_SRC2_U32 + + Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() + { + } // ~Inst_DS__DS_RSUB_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] - MEM[A]. + void + Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_SRC2_U32 class methods --- + + Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_src2_u32") + { + } // Inst_DS__DS_INC_SRC2_U32 + + Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() + { + } // ~Inst_DS__DS_INC_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). + void + Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_SRC2_U32 class methods --- + + Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_src2_u32") + { + } // Inst_DS__DS_DEC_SRC2_U32 + + Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() + { + } // ~Inst_DS__DS_DEC_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). + // Uint decrement. + void + Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_I32 class methods --- + + Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_i32") + { + } // Inst_DS__DS_MIN_SRC2_I32 + + Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() + { + } // ~Inst_DS__DS_MIN_SRC2_I32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_I32 class methods --- + + Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_i32") + { + } // Inst_DS__DS_MAX_SRC2_I32 + + Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() + { + } // ~Inst_DS__DS_MAX_SRC2_I32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_U32 class methods --- + + Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_u32") + { + } // Inst_DS__DS_MIN_SRC2_U32 + + Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() + { + } // ~Inst_DS__DS_MIN_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_U32 class methods --- + + Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_u32") + { + } // Inst_DS__DS_MAX_SRC2_U32 + + Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() + { + } // ~Inst_DS__DS_MAX_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_SRC2_B32 class methods --- + + Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_src2_b32") + { + } // Inst_DS__DS_AND_SRC2_B32 + + Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() + { + } // ~Inst_DS__DS_AND_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] & MEM[B]. + void + Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_SRC2_B32 class methods --- + + Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_src2_b32") + { + } // Inst_DS__DS_OR_SRC2_B32 + + Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() + { + } // ~Inst_DS__DS_OR_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] | MEM[B]. + void + Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_SRC2_B32 class methods --- + + Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_src2_b32") + { + } // Inst_DS__DS_XOR_SRC2_B32 + + Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() + { + } // ~Inst_DS__DS_XOR_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] ^ MEM[B]. + void + Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_SRC2_B32 class methods --- + + Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_src2_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_SRC2_B32 + + Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() + { + } // ~Inst_DS__DS_WRITE_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B]. + // Write dword. + void + Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_F32 class methods --- + + Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_SRC2_F32 + + Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() + { + } // ~Inst_DS__DS_MIN_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_F32 class methods --- + + Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_SRC2_F32 + + Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() + { + } // ~Inst_DS__DS_MAX_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_F32 class methods --- + + Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_ADD_SRC2_F32 + + Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() + { + } // ~Inst_DS__DS_ADD_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] + MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods --- + + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_release_all") + { + } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL + + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() + { + } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL + + // --- description from .arch file --- + // GDS Only: The GWS resource (rid) indicated will process this opcode by + // updating the counter and labeling the specified resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Incr the state counter of the resource + // state.counter[rid] = state.wave_in_queue; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release ALL queued waves; it Will have no effect if no + // --- waves are present. + void + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_INIT class methods --- + + Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_init") + { + } // Inst_DS__DS_GWS_INIT + + Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() + { + } // ~Inst_DS__DS_GWS_INIT + + // --- description from .arch file --- + // GDS Only: Initialize a barrier or semaphore resource. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Get the value to use in init + // index = find_first_valid(vector mask) + // value = DATA[thread: index] + // //Set the state of the resource + // state.counter[rid] = lsb(value); //limit #waves + // state.flag[rid] = 0; + // return rd_done; //release calling wave + void + Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_V class methods --- + + Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_v") + { + } // Inst_DS__DS_GWS_SEMA_V + + Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() + { + } // ~Inst_DS__DS_GWS_SEMA_V + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // updating the counter and labeling the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Incr the state counter of the resource + // state.counter[rid]++; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release one waved if any are queued in this resource. + void + Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_BR class methods --- + + Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_br") + { + } // Inst_DS__DS_GWS_SEMA_BR + + Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() + { + } // ~Inst_DS__DS_GWS_SEMA_BR + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // updating the counter by the bulk release delivered count and labeling + // the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // index = find first valid (vector mask) + // count = DATA[thread: index]; + // //Add count to the resource state counter + // state.counter[rid] += count; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release count number of waves, immediately if queued, + // or as they arrive from the noted resource. + void + Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_P class methods --- + + Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_p") + { + } // Inst_DS__DS_GWS_SEMA_P + + Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() + { + } // ~Inst_DS__DS_GWS_SEMA_P + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // queueing it until counter enables a release and then decrementing the + // counter of the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // state.type = SEMAPHORE; + // ENQUEUE until(state[rid].counter > 0) + // state[rid].counter--; + // return rd_done + void + Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_BARRIER class methods --- + + Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_barrier") + { + } // Inst_DS__DS_GWS_BARRIER + + Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() + { + } // ~Inst_DS__DS_GWS_BARRIER + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // queueing it until barrier is satisfied. The number of waves needed is + // passed in as DATA of first valid thread. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0]; + // index = find first valid (vector mask); + // value = DATA[thread: index]; + // // Input Decision Machine + // state.type[rid] = BARRIER; + // if (state[rid].counter <= 0) { + // thread[rid].flag = state[rid].flag; + // ENQUEUE; + // state[rid].flag = !state.flag; + // state[rid].counter = value; + // return rd_done; + // } else { + // state[rid].counter--; + // thread.flag = state[rid].flag; + // ENQUEUE; + // } + // Since the waves deliver the count for the next barrier, this function + // can have a different size barrier for each occurrence. + // // Release Machine + // if (state.type == BARRIER) { + // if (state.flag != thread.flag) { + // return rd_done; + // } + // } + void + Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CONSUME class methods --- + + Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_consume") + { + } // Inst_DS__DS_CONSUME + + Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() + { + } // ~Inst_DS__DS_CONSUME + + // --- description from .arch file --- + // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS + // memory at (M0.base + instr_offset). Return the pre-operation value to + // VGPRs. + void + Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_APPEND class methods --- + + Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_append") + { + } // Inst_DS__DS_APPEND + + Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() + { + } // ~Inst_DS__DS_APPEND + + // --- description from .arch file --- + // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory + // at (M0.base + instr_offset). Return the pre-operation value to VGPRs. + void + Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ORDERED_COUNT class methods --- + + Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_ordered_count") + { + } // Inst_DS__DS_ORDERED_COUNT + + Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() + { + } // ~Inst_DS__DS_ORDERED_COUNT + + // --- description from .arch file --- + // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated + // ordered-count counters (aka 'packers'). Additional bits of instr.offset + // field are overloaded to hold packer-id, 'last'. + void + Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_U64 class methods --- + + Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_u64") + { + } // Inst_DS__DS_ADD_SRC2_U64 + + Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() + { + } // ~Inst_DS__DS_ADD_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] + MEM[B]. + void + Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_SRC2_U64 class methods --- + + Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_src2_u64") + { + } // Inst_DS__DS_SUB_SRC2_U64 + + Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() + { + } // ~Inst_DS__DS_SUB_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] - MEM[B]. + void + Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_SRC2_U64 class methods --- + + Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_src2_u64") + { + } // Inst_DS__DS_RSUB_SRC2_U64 + + Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() + { + } // ~Inst_DS__DS_RSUB_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] - MEM[A]. + void + Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_SRC2_U64 class methods --- + + Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_src2_u64") + { + } // Inst_DS__DS_INC_SRC2_U64 + + Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() + { + } // ~Inst_DS__DS_INC_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). + void + Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_SRC2_U64 class methods --- + + Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_src2_u64") + { + } // Inst_DS__DS_DEC_SRC2_U64 + + Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() + { + } // ~Inst_DS__DS_DEC_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). + // Uint decrement. + void + Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_I64 class methods --- + + Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_i64") + { + } // Inst_DS__DS_MIN_SRC2_I64 + + Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() + { + } // ~Inst_DS__DS_MIN_SRC2_I64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_I64 class methods --- + + Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_i64") + { + } // Inst_DS__DS_MAX_SRC2_I64 + + Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() + { + } // ~Inst_DS__DS_MAX_SRC2_I64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_U64 class methods --- + + Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_u64") + { + } // Inst_DS__DS_MIN_SRC2_U64 + + Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() + { + } // ~Inst_DS__DS_MIN_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_U64 class methods --- + + Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_u64") + { + } // Inst_DS__DS_MAX_SRC2_U64 + + Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() + { + } // ~Inst_DS__DS_MAX_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_SRC2_B64 class methods --- + + Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_src2_b64") + { + } // Inst_DS__DS_AND_SRC2_B64 + + Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() + { + } // ~Inst_DS__DS_AND_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] & MEM[B]. + void + Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_SRC2_B64 class methods --- + + Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_src2_b64") + { + } // Inst_DS__DS_OR_SRC2_B64 + + Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() + { + } // ~Inst_DS__DS_OR_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] | MEM[B]. + void + Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_SRC2_B64 class methods --- + + Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_src2_b64") + { + } // Inst_DS__DS_XOR_SRC2_B64 + + Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() + { + } // ~Inst_DS__DS_XOR_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] ^ MEM[B]. + void + Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_SRC2_B64 class methods --- + + Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_src2_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_SRC2_B64 + + Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() + { + } // ~Inst_DS__DS_WRITE_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B]. + // Write qword. + void + Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_F64 class methods --- + + Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_SRC2_F64 + + Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() + { + } // ~Inst_DS__DS_MIN_SRC2_F64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_F64 class methods --- + + Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_SRC2_F64 + + Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() + { + } // ~Inst_DS__DS_MAX_SRC2_F64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B96 class methods --- + + Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b96") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B96 + + Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() + { + } // ~Inst_DS__DS_WRITE_B96 + + // --- description from .arch file --- + // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. + // Tri-dword write. + void + Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); + + addr.read(); + data0.read(); + data1.read(); + data2.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite<3>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B128 class methods --- + + Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b128") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B128 + + Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() + { + } // ~Inst_DS__DS_WRITE_B128 + + // --- description from .arch file --- + // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. + // Qword write. + void + Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); + + addr.read(); + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite<4>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_READ_B96 class methods --- + + Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b96") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B96 + + Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() + { + } // ~Inst_DS__DS_READ_B96 + + // --- description from .arch file --- + // Tri-dword read. + void + Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead<3>(gpuDynInst, offset); + } + + void + Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } + // --- Inst_DS__DS_READ_B128 class methods --- + + Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b128") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B128 + + Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() + { + } // ~Inst_DS__DS_READ_B128 + + // --- description from .arch file --- + // Qword read. + void + Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead<4>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/exp.cc b/src/arch/amdgpu/vega/insts/exp.cc new file mode 100644 index 0000000000..31b6ded10f --- /dev/null +++ b/src/arch/amdgpu/vega/insts/exp.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_EXP__EXP class methods --- + + Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) + : Inst_EXP(iFmt, "exp") + { + } // Inst_EXP__EXP + + Inst_EXP__EXP::~Inst_EXP__EXP() + { + } // ~Inst_EXP__EXP + + // --- description from .arch file --- + // Export through SX. + void + Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/flat.cc b/src/arch/amdgpu/vega/insts/flat.cc new file mode 100644 index 0000000000..7f79025b3f --- /dev/null +++ b/src/arch/amdgpu/vega/insts/flat.cc @@ -0,0 +1,2138 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods --- + + Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_ubyte") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_UBYTE + + Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() + { + } // ~Inst_FLAT__FLAT_LOAD_UBYTE + + // --- description from .arch file --- + // Untyped buffer load unsigned byte (zero extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); + } // execute + // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- + + Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_sbyte") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_SBYTE + + Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() + { + } // ~Inst_FLAT__FLAT_LOAD_SBYTE + + // --- description from .arch file --- + // Untyped buffer load signed byte (sign extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_FLAT__FLAT_LOAD_USHORT class methods --- + + Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_ushort") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_USHORT + + Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() + { + } // ~Inst_FLAT__FLAT_LOAD_USHORT + + // --- description from .arch file --- + // Untyped buffer load unsigned short (zero extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); + } // execute + + // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods --- + + Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_sshort") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_SSHORT + + Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() + { + } // ~Inst_FLAT__FLAT_LOAD_SSHORT + + // --- description from .arch file --- + // Untyped buffer load signed short (sign extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_FLAT__FLAT_LOAD_DWORD class methods --- + + Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORD + + Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORD + + // --- description from .arch file --- + // Untyped buffer load dword. + void + Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + vdst.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX2 + + Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer load 2 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + vdst.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx3") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX3 + + Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer load 3 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<3>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX4 + + Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer load 4 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_BYTE class methods --- + + Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_byte") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_BYTE + + Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() + { + } // ~Inst_FLAT__FLAT_STORE_BYTE + + // --- description from .arch file --- + // Untyped buffer store byte. + void + Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU8 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_FLAT__FLAT_STORE_SHORT class methods --- + + Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_short") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_SHORT + + Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() + { + } // ~Inst_FLAT__FLAT_STORE_SHORT + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU16 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_SHORT_D16_HI class methods --- + + Inst_FLAT__FLAT_STORE_SHORT_D16_HI:: + Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_short_d16_hi") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_SHORT_D16_HI + + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::~Inst_FLAT__FLAT_STORE_SHORT_D16_HI() + { + } // ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = (data[lane] >> 16); + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORD class methods --- + + Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORD + + Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() + { + } // ~Inst_FLAT__FLAT_STORE_DWORD + + // --- description from .arch file --- + // Untyped buffer store dword. + void + Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX2 + + Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer store 2 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx3") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX3 + + Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer store 3 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data0(gpuDynInst, extData.DATA); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); + + data0.read(); + data1.read(); + data2.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<3>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX4 + + Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer store 4 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data0(gpuDynInst, extData.DATA); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); + + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods --- + + Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SWAP + + Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + + // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP + ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() + { + } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD + + Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods --- + + Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SUB + + Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMIN + + Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMIN + + Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMAX + + Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMAX + + Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_AND class methods --- + + Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_AND + + Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() + { + } // ~Inst_FLAT__FLAT_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_OR class methods --- + + Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_OR + + Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() + { + } // ~Inst_FLAT__FLAT_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + + // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- + + Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_XOR + + Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() + { + } // ~Inst_FLAT__FLAT_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_INC class methods --- + + Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_INC + + Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() + { + } // ~Inst_FLAT__FLAT_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods --- + + Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_DEC + + Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() + { + } // ~Inst_FLAT__FLAT_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_swap_x2") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 + + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA[0:1]; + // cmp = DATA[2:3]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_x2") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 + + Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_sub_x2") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 + + Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 + + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 + + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 + + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 + + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_and_x2") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_AND_X2 + + Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_or_x2") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_OR_X2 + + Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_xor_x2") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 + + Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_inc_x2") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_INC_X2 + + Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_dec_x2") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 + + Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::Inst_FLAT__FLAT_ATOMIC_ADD_F32( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f32") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::~Inst_FLAT__FLAT_ATOMIC_ADD_F32() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods --- + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16() + { + } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::Inst_FLAT__FLAT_ATOMIC_ADD_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f64") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::~Inst_FLAT__FLAT_ATOMIC_ADD_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::Inst_FLAT__FLAT_ATOMIC_MIN_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_min_f64") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::~Inst_FLAT__FLAT_ATOMIC_MIN_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::Inst_FLAT__FLAT_ATOMIC_MAX_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_max_f64") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::~Inst_FLAT__FLAT_ATOMIC_MAX_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/inst_util.hh b/src/arch/amdgpu/vega/insts/inst_util.hh index 7ec2e2ddd3..bc64ff88da 100644 --- a/src/arch/amdgpu/vega/insts/inst_util.hh +++ b/src/arch/amdgpu/vega/insts/inst_util.hh @@ -35,6 +35,7 @@ #include #include "arch/amdgpu/vega/gpu_registers.hh" +#include "arch/amdgpu/vega/insts/gpu_static_inst.hh" namespace gem5 { @@ -315,7 +316,8 @@ namespace VegaISA * 0x142: broadcast 15th thread of each row to next row * 0x143: broadcast thread 31 to rows 2 and 3 */ - int dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum, + inline int + dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum, int rowOffset, bool & outOfBounds) { // local variables diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc deleted file mode 100644 index 651b6dc9f9..0000000000 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ /dev/null @@ -1,46539 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/vega/insts/instructions.hh" - -#include - -#include "arch/amdgpu/vega/insts/inst_util.hh" -#include "debug/VEGA.hh" -#include "debug/GPUSync.hh" -#include "dev/amdgpu/hwreg_defines.hh" -#include "gpu-compute/shader.hh" - -namespace gem5 -{ - -namespace VegaISA -{ - // --- Inst_SOP2__S_ADD_U32 class methods --- - - Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_U32 - - Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() - { - } // ~Inst_SOP2__S_ADD_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned - // --- overflow/carry-out for S_ADDC_U32. - void - Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) - >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUB_U32 class methods --- - - Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_U32 - - Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() - { - } // ~Inst_SOP2__S_SUB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for - // --- S_SUBB_U32. - void - Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (src1.rawData() > src0.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ADD_I32 class methods --- - - Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_I32 - - Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() - { - } // ~Inst_SOP2__S_ADD_I32 - - // --- description from .arch file --- - // D.i = S0.i + S1.i; - // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - // This opcode is not suitable for use with S_ADDC_U32 for implementing - // 64-bit operations. - void - Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) - ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUB_I32 class methods --- - - Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_I32 - - Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() - { - } // ~Inst_SOP2__S_SUB_I32 - - // --- description from .arch file --- - // D.i = S0.i - S1.i; - // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - // CAUTION: The condition code behaviour for this opcode is inconsistent - // with V_SUB_I32; see V_SUB_I32 for further details. - // This opcode is not suitable for use with S_SUBB_U32 for implementing - // 64-bit operations. - void - Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ADDC_U32 class methods --- - - Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_addc_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADDC_U32 - - Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() - { - } // ~Inst_SOP2__S_ADDC_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + SCC; - // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned - // overflow. - void - Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() + src1.rawData() + scc.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() - + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUBB_U32 class methods --- - - Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_subb_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUBB_U32 - - Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() - { - } // ~Inst_SOP2__S_SUBB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - SCC; - // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. - void - Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() - src1.rawData() - scc.rawData(); - scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MIN_I32 class methods --- - - Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_I32 - - Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() - { - } // ~Inst_SOP2__S_MIN_I32 - - // --- description from .arch file --- - // D.i = (S0.i < S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MIN_U32 class methods --- - - Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_U32 - - Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() - { - } // ~Inst_SOP2__S_MIN_U32 - - // --- description from .arch file --- - // D.u = (S0.u < S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MAX_I32 class methods --- - - Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_I32 - - Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() - { - } // ~Inst_SOP2__S_MAX_I32 - - // --- description from .arch file --- - // D.i = (S0.i > S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MAX_U32 class methods --- - - Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_U32 - - Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() - { - } // ~Inst_SOP2__S_MAX_U32 - - // --- description from .arch file --- - // D.u = (S0.u > S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_CSELECT_B32 class methods --- - - Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B32 - - Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() - { - } // ~Inst_SOP2__S_CSELECT_B32 - - // --- description from .arch file --- - // D.u = SCC ? S0.u : S1.u (conditional select). - void - Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_CSELECT_B64 class methods --- - - Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B64 - - Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() - { - } // ~Inst_SOP2__S_CSELECT_B64 - - // --- description from .arch file --- - // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). - void - Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_AND_B32 class methods --- - - Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B32 - - Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() - { - } // ~Inst_SOP2__S_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_AND_B64 class methods --- - - Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B64 - - Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() - { - } // ~Inst_SOP2__S_AND_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 & S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_OR_B32 class methods --- - - Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B32 - - Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() - { - } // ~Inst_SOP2__S_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_OR_B64 class methods --- - - Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B64 - - Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() - { - } // ~Inst_SOP2__S_OR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 | S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XOR_B32 class methods --- - - Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B32 - - Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() - { - } // ~Inst_SOP2__S_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XOR_B64 class methods --- - - Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B64 - - Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() - { - } // ~Inst_SOP2__S_XOR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 ^ S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ANDN2_B32 class methods --- - - Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B32 - - Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() - { - } // ~Inst_SOP2__S_ANDN2_B32 - - // --- description from .arch file --- - // D.u = S0.u & ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ANDN2_B64 class methods --- - - Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B64 - - Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() - { - } // ~Inst_SOP2__S_ANDN2_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 & ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ORN2_B32 class methods --- - - Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B32 - - Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() - { - } // ~Inst_SOP2__S_ORN2_B32 - - // --- description from .arch file --- - // D.u = S0.u | ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ORN2_B64 class methods --- - - Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B64 - - Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() - { - } // ~Inst_SOP2__S_ORN2_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 | ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NAND_B32 class methods --- - - Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B32 - - Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() - { - } // ~Inst_SOP2__S_NAND_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u & S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NAND_B64 class methods --- - - Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B64 - - Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() - { - } // ~Inst_SOP2__S_NAND_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 & S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NOR_B32 class methods --- - - Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B32 - - Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() - { - } // ~Inst_SOP2__S_NOR_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u | S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NOR_B64 class methods --- - - Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B64 - - Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() - { - } // ~Inst_SOP2__S_NOR_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 | S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XNOR_B32 class methods --- - - Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B32 - - Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() - { - } // ~Inst_SOP2__S_XNOR_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u ^ S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XNOR_B64 class methods --- - - Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B64 - - Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() - { - } // ~Inst_SOP2__S_XNOR_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 ^ S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHL_B32 class methods --- - - Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B32 - - Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() - { - } // ~Inst_SOP2__S_LSHL_B32 - - // --- description from .arch file --- - // D.u = S0.u << S1.u[4:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHL_B64 class methods --- - - Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B64 - - Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() - { - } // ~Inst_SOP2__S_LSHL_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 << S1.u[5:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHR_B32 class methods --- - - Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B32 - - Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() - { - } // ~Inst_SOP2__S_LSHR_B32 - - // --- description from .arch file --- - // D.u = S0.u >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHR_B64 class methods --- - - Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B64 - - Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() - { - } // ~Inst_SOP2__S_LSHR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ASHR_I32 class methods --- - - Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I32 - - Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() - { - } // ~Inst_SOP2__S_ASHR_I32 - - // --- description from .arch file --- - // D.i = signext(S0.i) >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ASHR_I64 class methods --- - - Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I64 - - Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() - { - } // ~Inst_SOP2__S_ASHR_I64 - - // --- description from .arch file --- - // D.i64 = signext(S0.i64) >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFM_B32 class methods --- - - Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B32 - - Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() - { - } // ~Inst_SOP2__S_BFM_B32 - - // --- description from .arch file --- - // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) - << bits(src1.rawData(), 4, 0); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_BFM_B64 class methods --- - - Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B64 - - Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() - { - } // ~Inst_SOP2__S_BFM_B64 - - // --- description from .arch file --- - // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) - << bits(src1.rawData(), 5, 0); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_MUL_I32 class methods --- - - Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_I32 - - Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() - { - } // ~Inst_SOP2__S_MUL_I32 - - // --- description from .arch file --- - // D.i = S0.i * S1.i. - void - Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = src0.rawData() * src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_BFE_U32 class methods --- - - Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U32 - - Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() - { - } // ~Inst_SOP2__S_BFE_U32 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.u = (S0.u>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_I32 class methods --- - - Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I32 - - Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() - { - } // ~Inst_SOP2__S_BFE_I32 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.i = (S0.i>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - - // Above extracted a signed int of size src1[22:16] bits which needs - // to be signed-extended. Check if the MSB of our src1[22:16]-bit - // integer is 1, and sign extend it is. - // - // Note: The description in the Vega ISA manual does not mention to - // sign-extend the result. An update description can be found in the - // more recent RDNA3 manual here: - // https://developer.amd.com/wp-content/resources/ - // RDNA3_Shader_ISA_December2022.pdf - if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { - sdst = sdst.rawData() - | (0xffffffff << bits(src1.rawData(), 22, 16)); - } - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_U64 class methods --- - - Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U64 - - Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() - { - } // ~Inst_SOP2__S_BFE_U64 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_I64 class methods --- - - Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I64 - - Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() - { - } // ~Inst_SOP2__S_BFE_I64 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - - // Above extracted a signed int of size src1[22:16] bits which needs - // to be signed-extended. Check if the MSB of our src1[22:16]-bit - // integer is 1, and sign extend it is. - if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { - sdst = sdst.rawData() - | 0xffffffffffffffff << bits(src1.rawData(), 22, 16); - } - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_CBRANCH_G_FORK class methods --- - - Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cbranch_g_fork") - { - setFlag(Branch); - } // Inst_SOP2__S_CBRANCH_G_FORK - - Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() - { - } // ~Inst_SOP2__S_CBRANCH_G_FORK - - // --- description from .arch file --- - // mask_pass = S0.u64 & EXEC; - // mask_fail = ~S0.u64 & EXEC; - // if(mask_pass == EXEC) - // PC = S1.u64; - // elsif(mask_fail == EXEC) - // PC += 4; - // elsif(bitcount(mask_fail) < bitcount(mask_pass)) - // EXEC = mask_fail; - // SGPR[CSP*4] = { S1.u64, mask_pass }; - // CSP++; - // PC += 4; - // else - // EXEC = mask_pass; - // SGPR[CSP*4] = { PC + 4, mask_fail }; - // CSP++; - // PC = S1.u64; - // end. - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr) and - // S1 = 64-bit byte address of target instruction. - // See also S_CBRANCH_JOIN. - void - Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP2__S_ABSDIFF_I32 class methods --- - - Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_absdiff_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ABSDIFF_I32 - - Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() - { - } // ~Inst_SOP2__S_ABSDIFF_I32 - - // --- description from .arch file --- - // D.i = S0.i - S1.i; - // if(D.i < 0) then D.i = -D.i; - // SCC = 1 if result is non-zero. - // Compute the absolute value of difference between two values. - void - Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - sdst = std::abs(src0.rawData() - src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods --- - - Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( - InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_rfe_restore_b64") - { - } // Inst_SOP2__S_RFE_RESTORE_B64 - - Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() - { - } // ~Inst_SOP2__S_RFE_RESTORE_B64 - - // --- description from .arch file --- - // PRIV = 0; - // PC = S0.u64; - // INST_ATC = S1.u32[0]. - // Return from exception handler and continue, possibly changing the - // --- instruction ATC mode. - // This instruction may only be used within a trap handler. - // Use this instruction when the main program may be in a different memory - // --- space than the trap handler. - void - Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP2__S_MUL_HI_U32 class methods --- - - Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_hi_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_HI_U32 - - Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32() - { - } // ~Inst_SOP2__S_MUL_HI_U32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32; - void - Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - VecElemU64 tmp_dst = - ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData()); - sdst = (tmp_dst >> 32); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_MUL_HI_I32 class methods --- - - Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_hi_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_HI_I32 - - Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32() - { - } // ~Inst_SOP2__S_MUL_HI_I32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32; - void - Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - VecElemI64 tmp_src0 = - sext::digits>(src0.rawData()); - VecElemI64 tmp_src1 = - sext::digits>(src1.rawData()); - sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - - sdst.write(); - } // execute - // --- Inst_SOPK__S_MOVK_I32 class methods --- - - Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_movk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MOVK_I32 - - Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() - { - } // ~Inst_SOPK__S_MOVK_I32 - - // --- description from .arch file --- - // D.i = signext(SIMM16) (sign extension). - void - Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - sdst = simm16; - - sdst.write(); - } // execute - // --- Inst_SOPK__S_CMOVK_I32 class methods --- - - Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmovk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMOVK_I32 - - Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() - { - } // ~Inst_SOPK__S_CMOVK_I32 - - // --- description from .arch file --- - // if(SCC) then D.i = signext(SIMM16); - // else NOP. - // Conditional move with sign extension. - void - Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - sdst = simm16; - sdst.write(); - } - } // execute - // --- Inst_SOPK__S_CMPK_EQ_I32 class methods --- - - Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_I32 - - Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() - { - } // ~Inst_SOPK__S_CMPK_EQ_I32 - - // --- description from .arch file --- - // SCC = (S0.i == signext(SIMM16)). - void - Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LG_I32 class methods --- - - Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_I32 - - Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() - { - } // ~Inst_SOPK__S_CMPK_LG_I32 - - // --- description from .arch file --- - // SCC = (S0.i != signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GT_I32 class methods --- - - Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_I32 - - Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() - { - } // ~Inst_SOPK__S_CMPK_GT_I32 - - // --- description from .arch file --- - // SCC = (S0.i > signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GE_I32 class methods --- - - Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_I32 - - Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() - { - } // ~Inst_SOPK__S_CMPK_GE_I32 - - // --- description from .arch file --- - // SCC = (S0.i >= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LT_I32 class methods --- - - Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_I32 - - Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() - { - } // ~Inst_SOPK__S_CMPK_LT_I32 - - // --- description from .arch file --- - // SCC = (S0.i < signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LE_I32 class methods --- - - Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_I32 - - Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() - { - } // ~Inst_SOPK__S_CMPK_LE_I32 - - // --- description from .arch file --- - // SCC = (S0.i <= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_EQ_U32 class methods --- - - Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_U32 - - Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() - { - } // ~Inst_SOPK__S_CMPK_EQ_U32 - - // --- description from .arch file --- - // SCC = (S0.u == SIMM16). - void - Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LG_U32 class methods --- - - Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_U32 - - Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() - { - } // ~Inst_SOPK__S_CMPK_LG_U32 - - // --- description from .arch file --- - // SCC = (S0.u != SIMM16). - void - Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GT_U32 class methods --- - - Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_U32 - - Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() - { - } // ~Inst_SOPK__S_CMPK_GT_U32 - - // --- description from .arch file --- - // SCC = (S0.u > SIMM16). - void - Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GE_U32 class methods --- - - Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_U32 - - Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() - { - } // ~Inst_SOPK__S_CMPK_GE_U32 - - // --- description from .arch file --- - // SCC = (S0.u >= SIMM16). - void - Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LT_U32 class methods --- - - Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_U32 - - Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() - { - } // ~Inst_SOPK__S_CMPK_LT_U32 - - // --- description from .arch file --- - // SCC = (S0.u < SIMM16). - void - Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LE_U32 class methods --- - - Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_U32 - - Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() - { - } // ~Inst_SOPK__S_CMPK_LE_U32 - - // --- description from .arch file --- - // SCC = (S0.u <= SIMM16). - void - Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_ADDK_I32 class methods --- - - Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_addk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_ADDK_I32 - - Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() - { - } // ~Inst_SOPK__S_ADDK_I32 - - // --- description from .arch file --- - // D.i = D.i + signext(SIMM16); - // SCC = overflow. - void - Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); - scc = (bits(src.rawData(), 31) == bits(simm16, 15) - && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOPK__S_MULK_I32 class methods --- - - Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_mulk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MULK_I32 - - Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() - { - } // ~Inst_SOPK__S_MULK_I32 - - // --- description from .arch file --- - // D.i = D.i * signext(SIMM16). - void - Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); - - sdst.write(); - } // execute - // --- Inst_SOPK__S_CBRANCH_I_FORK class methods --- - - Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cbranch_i_fork") - { - setFlag(Branch); - } // Inst_SOPK__S_CBRANCH_I_FORK - - Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() - { - } // ~Inst_SOPK__S_CBRANCH_I_FORK - - // --- description from .arch file --- - // mask_pass = S0.u64 & EXEC; - // mask_fail = ~S0.u64 & EXEC; - // target_addr = PC + signext(SIMM16 * 4) + 4; - // if(mask_pass == EXEC) - // PC = target_addr; - // elsif(mask_fail == EXEC) - // PC += 4; - // elsif(bitcount(mask_fail) < bitcount(mask_pass)) - // EXEC = mask_fail; - // SGPR[CSP*4] = { target_addr, mask_pass }; - // CSP++; - // PC += 4; - // else - // EXEC = mask_pass; - // SGPR[CSP*4] = { PC + 4, mask_fail }; - // CSP++; - // PC = target_addr; - // end. - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr), and - // SIMM16 = signed DWORD branch offset relative to next instruction. - // See also S_CBRANCH_JOIN. - void - Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPK__S_GETREG_B32 class methods --- - - Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_getreg_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_GETREG_B32 - - Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() - { - } // ~Inst_SOPK__S_GETREG_B32 - - // --- description from .arch file --- - // D.u = hardware-reg. Read some or all of a hardware register into the - // LSBs of D. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - sdst.read(); - - // Store value from hardware to part of the SDST. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - sdst = (hwreg & mask) >> offset; - sdst.write(); - } // execute - // --- Inst_SOPK__S_SETREG_B32 class methods --- - - Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_B32 - - Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() - { - } // ~Inst_SOPK__S_SETREG_B32 - - // --- description from .arch file --- - // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware - // register. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - sdst.read(); - - // Store value from SDST to part of the hardware register. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask)); - gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); - - // set MODE register to control the behavior of single precision - // floating-point numbers: denormal mode or round mode - if (hwregId==1 && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing MODE of floating-point numbers - panicUnimplemented(); - } // execute - // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods --- - - Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( - InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_imm32_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_IMM32_B32 - - Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() - { - } // ~Inst_SOPK__S_SETREG_IMM32_B32 - - // --- description from .arch file --- - // Write some or all of the LSBs of IMM32 into a hardware register; this - // --- instruction requires a 32-bit literal constant. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarRegI32 simm32 = extData.imm_u32; - - // Store value from SIMM32 to part of the hardware register. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask)); - gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); - - // set MODE register to control the behavior of single precision - // floating-point numbers: denormal mode or round mode - if (hwregId==HW_REG_MODE && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing modes of single-precision FPs - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_MOV_B32 class methods --- - - Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B32 - - Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() - { - } // ~Inst_SOP1__S_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - void - Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOV_B64 class methods --- - - Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B64 - - Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() - { - } // ~Inst_SOP1__S_MOV_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64. - void - Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_CMOV_B32 class methods --- - - Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B32 - - Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() - { - } // ~Inst_SOP1__S_CMOV_B32 - - // --- description from .arch file --- - // (SCC) then D.u = S0.u; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } // execute - // --- Inst_SOP1__S_CMOV_B64 class methods --- - - Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B64 - - Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() - { - } // ~Inst_SOP1__S_CMOV_B64 - - // --- description from .arch file --- - // if(SCC) then D.u64 = S0.u64; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } // execute - // --- Inst_SOP1__S_NOT_B32 class methods --- - - Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B32 - - Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() - { - } // ~Inst_SOP1__S_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NOT_B64 class methods --- - - Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B64 - - Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() - { - } // ~Inst_SOP1__S_NOT_B64 - - // --- description from .arch file --- - // D.u64 = ~S0.u64; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_WQM_B32 class methods --- - - Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B32 - - Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() - { - } // ~Inst_SOP1__S_WQM_B32 - - // --- description from .arch file --- - // D[i] = (S0[(i & ~3):(i | 3)] != 0); - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_WQM_B64 class methods --- - - Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B64 - - Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() - { - } // ~Inst_SOP1__S_WQM_B64 - - // --- description from .arch file --- - // D[i] = (S0[(i & ~3):(i | 3)] != 0); - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BREV_B32 class methods --- - - Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B32 - - Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() - { - } // ~Inst_SOP1__S_BREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31] (reverse bits). - void - Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BREV_B64 class methods --- - - Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B64 - - Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() - { - } // ~Inst_SOP1__S_BREV_B64 - - // --- description from .arch file --- - // D.u64[63:0] = S0.u64[0:63] (reverse bits). - void - Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BCNT0_I32_B32 class methods --- - - Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B32 - - Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() - { - } // ~Inst_SOP1__S_BCNT0_I32_B32 - - // --- description from .arch file --- - // D.i = CountZeroBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT0_I32_B64 class methods --- - - Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B64 - - Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() - { - } // ~Inst_SOP1__S_BCNT0_I32_B64 - - // --- description from .arch file --- - // D.i = CountZeroBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT1_I32_B32 class methods --- - - Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B32 - - Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() - { - } // ~Inst_SOP1__S_BCNT1_I32_B32 - - // --- description from .arch file --- - // D.i = CountOneBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT1_I32_B64 class methods --- - - Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B64 - - Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() - { - } // ~Inst_SOP1__S_BCNT1_I32_B64 - - // --- description from .arch file --- - // D.i = CountOneBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_FF0_I32_B32 class methods --- - - Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B32 - - Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() - { - } // ~Inst_SOP1__S_FF0_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstZero(S0.u); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF0_I32_B64 class methods --- - - Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B64 - - Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() - { - } // ~Inst_SOP1__S_FF0_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstZero(S0.u64); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF1_I32_B32 class methods --- - - Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B32 - - Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() - { - } // ~Inst_SOP1__S_FF1_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF1_I32_B64 class methods --- - - Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B64 - - Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() - { - } // ~Inst_SOP1__S_FF1_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_B32 class methods --- - - Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B32 - - Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() - { - } // ~Inst_SOP1__S_FLBIT_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_B64 class methods --- - - Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B64 - - Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() - { - } // ~Inst_SOP1__S_FLBIT_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32 class methods --- - - Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32 - - Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() - { - } // ~Inst_SOP1__S_FLBIT_I32 - - // --- description from .arch file --- - // D.i = FirstOppositeSignBit(S0.i); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_I64 class methods --- - - Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_i64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_I64 - - Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() - { - } // ~Inst_SOP1__S_FLBIT_I32_I64 - - // --- description from .arch file --- - // D.i = FirstOppositeSignBit(S0.i64); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SEXT_I32_I8 class methods --- - - Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i8") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I8 - - Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() - { - } // ~Inst_SOP1__S_SEXT_I32_I8 - - // --- description from .arch file --- - // D.i = signext(S0.i[7:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 7, 0)); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SEXT_I32_I16 class methods --- - - Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i16") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I16 - - Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() - { - } // ~Inst_SOP1__S_SEXT_I32_I16 - - // --- description from .arch file --- - // D.i = signext(S0.i[15:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 15, 0)); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET0_B32 class methods --- - - Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B32 - - Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() - { - } // ~Inst_SOP1__S_BITSET0_B32 - - // --- description from .arch file --- - // D.u[S0.u[4:0]] = 0. - void - Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 0); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET0_B64 class methods --- - - Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B64 - - Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() - { - } // ~Inst_SOP1__S_BITSET0_B64 - - // --- description from .arch file --- - // D.u64[S0.u[5:0]] = 0. - void - Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 0); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET1_B32 class methods --- - - Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B32 - - Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() - { - } // ~Inst_SOP1__S_BITSET1_B32 - - // --- description from .arch file --- - // D.u[S0.u[4:0]] = 1. - void - Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 1); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET1_B64 class methods --- - - Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B64 - - Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() - { - } // ~Inst_SOP1__S_BITSET1_B64 - - // --- description from .arch file --- - // D.u64[S0.u[5:0]] = 1. - void - Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 1); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_GETPC_B64 class methods --- - - Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_getpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_GETPC_B64 - - Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() - { - } // ~Inst_SOP1__S_GETPC_B64 - - // --- description from .arch file --- - // D.u64 = PC + 4. - // Destination receives the byte address of the next instruction. - void - Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Addr pc = gpuDynInst->pc(); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - sdst = pc + 4; - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SETPC_B64 class methods --- - - Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_setpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SETPC_B64 - - Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() - { - } // ~Inst_SOP1__S_SETPC_B64 - - // --- description from .arch file --- - // PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - - src.read(); - - wf->pc(src.rawData()); - } // execute - // --- Inst_SOP1__S_SWAPPC_B64 class methods --- - - Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_swappc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SWAPPC_B64 - - Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() - { - } // ~Inst_SOP1__S_SWAPPC_B64 - - // --- description from .arch file --- - // D.u64 = PC + 4; PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = pc + 4; - - wf->pc(src.rawData()); - sdst.write(); - } // execute - // --- Inst_SOP1__S_RFE_B64 class methods --- - - Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_rfe_b64") - { - } // Inst_SOP1__S_RFE_B64 - - Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() - { - } // ~Inst_SOP1__S_RFE_B64 - - // --- description from .arch file --- - // PRIV = 0; - // PC = S0.u64. - // Return from exception handler and continue. - // This instruction may only be used within a trap handler. - void - Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_and_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_AND_SAVEEXEC_B64 - - Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 & EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() & wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_or_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_OR_SAVEEXEC_B64 - - Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 | EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() | wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_XOR_SAVEEXEC_B64 - - Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 ^ EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 & ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_ORN2_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 | ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NAND_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nand_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_NAND_SAVEEXEC_B64 - - Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 & EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_NOR_SAVEEXEC_B64 - - Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 | EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_XNOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 ^ EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_QUADMASK_B32 class methods --- - - Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B32 - - Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() - { - } // ~Inst_SOP1__S_QUADMASK_B32 - - // --- description from .arch file --- - // D.u = QuadMask(S0.u): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_QUADMASK_B64 class methods --- - - Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B64 - - Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() - { - } // ~Inst_SOP1__S_QUADMASK_B64 - - // --- description from .arch file --- - // D.u64 = QuadMask(S0.u64): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_MOVRELS_B32 class methods --- - - Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B32 - - Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() - { - } // ~Inst_SOP1__S_MOVRELS_B32 - - // --- description from .arch file --- - // D.u = SGPR[S0.u + M0.u].u (move from relative source). - void - Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELS_B64 class methods --- - - Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B64 - - Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() - { - } // ~Inst_SOP1__S_MOVRELS_B64 - - // --- description from .arch file --- - // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELD_B32 class methods --- - - Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B32 - - Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() - { - } // ~Inst_SOP1__S_MOVRELD_B32 - - // --- description from .arch file --- - // SGPR[D.u + M0.u].u = S0.u (move to relative destination). - void - Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELD_B64 class methods --- - - Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B64 - - Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() - { - } // ~Inst_SOP1__S_MOVRELD_B64 - - // --- description from .arch file --- - // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_CBRANCH_JOIN class methods --- - - Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cbranch_join") - { - setFlag(Branch); - setFlag(WritesEXEC); - } // Inst_SOP1__S_CBRANCH_JOIN - - Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() - { - } // ~Inst_SOP1__S_CBRANCH_JOIN - - // --- description from .arch file --- - // saved_csp = S0.u; - // if(CSP == saved_csp) then - // PC += 4; // Second time to JOIN: continue with program. - // else - // CSP -= 1; // First time to JOIN; jump to other FORK path. - // {PC, EXEC} = SGPR[CSP * 4]; // Read 128 bits from 4 consecutive - // SGPRs. - // end - // Conditional branch join point (end of conditional branch block). S0 is - // saved CSP value. - // See S_CBRANCH_G_FORK and S_CBRANCH_I_FORK for related instructions. - void - Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_ABS_I32 class methods --- - - Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_abs_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_ABS_I32 - - Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() - { - } // ~Inst_SOP1__S_ABS_I32 - - // --- description from .arch file --- - // if(S.i < 0) then D.i = -S.i; - // else D.i = S.i; - // SCC = 1 if result is non-zero. - // Integer absolute value. - void - Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = std::abs(src.rawData()); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_MOV_FED_B32 class methods --- - - Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_fed_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_FED_B32 - - Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() - { - } // ~Inst_SOP1__S_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u. Introduce an EDC double-detect error on write to the - // destination SGPR. - void - Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_SET_GPR_IDX_IDX class methods --- - - Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") - { - } // Inst_SOP1__S_SET_GPR_IDX_IDX - - Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() - { - } // ~Inst_SOP1__S_SET_GPR_IDX_IDX - - // --- description from .arch file --- - // M0[7:0] = S0.u[7:0]. - // Modify the index used in vector GPR indexing. - void - Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_I32 class methods --- - - Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_I32 - - Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() - { - } // ~Inst_SOPC__S_CMP_EQ_I32 - - // --- description from .arch file --- - // SCC = (S0.i == S1.i). - void - Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_I32 class methods --- - - Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_I32 - - Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() - { - } // ~Inst_SOPC__S_CMP_LG_I32 - - // --- description from .arch file --- - // SCC = (S0.i != S1.i). - void - Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GT_I32 class methods --- - - Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_I32 - - Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() - { - } // ~Inst_SOPC__S_CMP_GT_I32 - - // --- description from .arch file --- - // SCC = (S0.i > S1.i). - void - Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GE_I32 class methods --- - - Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_I32 - - Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() - { - } // ~Inst_SOPC__S_CMP_GE_I32 - - // --- description from .arch file --- - // SCC = (S0.i >= S1.i). - void - Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LT_I32 class methods --- - - Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_I32 - - Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() - { - } // ~Inst_SOPC__S_CMP_LT_I32 - - // --- description from .arch file --- - // SCC = (S0.i < S1.i). - void - Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LE_I32 class methods --- - - Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_I32 - - Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() - { - } // ~Inst_SOPC__S_CMP_LE_I32 - - // --- description from .arch file --- - // SCC = (S0.i <= S1.i). - void - Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_U32 class methods --- - - Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U32 - - Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() - { - } // ~Inst_SOPC__S_CMP_EQ_U32 - - // --- description from .arch file --- - // SCC = (S0.u == S1.u). - void - Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_U32 class methods --- - - Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U32 - - Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() - { - } // ~Inst_SOPC__S_CMP_LG_U32 - - // --- description from .arch file --- - // SCC = (S0.u != S1.u). - void - Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GT_U32 class methods --- - - Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_U32 - - Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() - { - } // ~Inst_SOPC__S_CMP_GT_U32 - - // --- description from .arch file --- - // SCC = (S0.u > S1.u). - void - Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GE_U32 class methods --- - - Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_U32 - - Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() - { - } // ~Inst_SOPC__S_CMP_GE_U32 - - // --- description from .arch file --- - // SCC = (S0.u >= S1.u). - void - Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LT_U32 class methods --- - - Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_U32 - - Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() - { - } // ~Inst_SOPC__S_CMP_LT_U32 - - // --- description from .arch file --- - // SCC = (S0.u < S1.u). - void - Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LE_U32 class methods --- - - Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_U32 - - Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() - { - } // ~Inst_SOPC__S_CMP_LE_U32 - - // --- description from .arch file --- - // SCC = (S0.u <= S1.u). - void - Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP0_B32 class methods --- - - Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B32 - - Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() - { - } // ~Inst_SOPC__S_BITCMP0_B32 - - // --- description from .arch file --- - // SCC = (S0.u[S1.u[4:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP1_B32 class methods --- - - Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B32 - - Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() - { - } // ~Inst_SOPC__S_BITCMP1_B32 - - // --- description from .arch file --- - // SCC = (S0.u[S1.u[4:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP0_B64 class methods --- - - Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B64 - - Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() - { - } // ~Inst_SOPC__S_BITCMP0_B64 - - // --- description from .arch file --- - // SCC = (S0.u64[S1.u[5:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP1_B64 class methods --- - - Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B64 - - Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() - { - } // ~Inst_SOPC__S_BITCMP1_B64 - - // --- description from .arch file --- - // SCC = (S0.u64[S1.u[5:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_SETVSKIP class methods --- - - Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_setvskip") - { - } // Inst_SOPC__S_SETVSKIP - - Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() - { - } // ~Inst_SOPC__S_SETVSKIP - - // --- description from .arch file --- - // VSKIP = S0.u[S1.u[4:0]]. - // Enables and disables VSKIP mode. - // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are - // issued. - // If any vector operations are outstanding, S_WAITCNT must be issued - // before executing. - // This instruction requires one waitstate after executing (e.g. S_NOP 0). - // Example: - // s_waitcnt 0 - // s_setvskip 1, 0 // Enable vskip mode. - // s_nop 1 - void - Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_SET_GPR_IDX_ON class methods --- - - Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_set_gpr_idx_on") - { - } // Inst_SOPC__S_SET_GPR_IDX_ON - - Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() - { - } // ~Inst_SOPC__S_SET_GPR_IDX_ON - - // --- description from .arch file --- - // MODE.gpr_idx_en = 1; - // M0[7:0] = S0.u[7:0]; - // M0[15:12] = SIMM4 (direct contents of S1 field); - // // Remaining bits of M0 are unmodified. - // Enable GPR indexing mode. Vector operations after this will perform - // relative GPR addressing based on the contents of M0. The structure - // SQ_M0_GPR_IDX_WORD may be used to decode M0. - // The raw contents of the S1 field are read and used to set the enable - // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and - // S1[3] = VDST_REL. - void - Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_U64 class methods --- - - Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U64 - - Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() - { - } // ~Inst_SOPC__S_CMP_EQ_U64 - - // --- description from .arch file --- - // SCC = (S0.i64 == S1.i64). - void - Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_U64 class methods --- - - Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U64 - - Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() - { - } // ~Inst_SOPC__S_CMP_LG_U64 - - // --- description from .arch file --- - // SCC = (S0.i64 != S1.i64). - void - Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPP__S_NOP class methods --- - - Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_nop") - { - setFlag(Nop); - } // Inst_SOPP__S_NOP - - Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() - { - } // ~Inst_SOPP__S_NOP - - // --- description from .arch file --- - // Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time, - // 7 = 8 times. - // This instruction may be used to introduce wait states to resolve - // hazards; see the shader programming guide for details. Compare with - // S_SLEEP. - void - Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_SOPP__S_ENDPGM class methods --- - - Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm") - { - setFlag(EndOfKernel); - } // Inst_SOPP__S_ENDPGM - - Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() - { - } // ~Inst_SOPP__S_ENDPGM - - // --- description from .arch file --- - // End of program; terminate wavefront. - // The hardware implicitly executes S_WAITCNT 0 before executing this - // --- instruction. - // See S_ENDPGM_SAVED for the context-switch version of this instruction. - void - Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - // delete extra instructions fetched for completed work-items - wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, - wf->instructionBuffer.end()); - - if (wf->pendingFetch) { - wf->dropFetch = true; - } - - wf->computeUnit->fetchStage.fetchUnit(wf->simdId) - .flushBuf(wf->wfSlotId); - wf->setStatus(Wavefront::S_STOPPED); - - int refCount = wf->computeUnit->getLds() - .decreaseRefCounter(wf->dispatchId, wf->wgId); - - /** - * The parent WF of this instruction is exiting, therefore - * it should not participate in this barrier any longer. This - * prevents possible deadlock issues if WFs exit early. - */ - int bar_id = WFBarrier::InvalidID; - if (wf->hasBarrier()) { - assert(wf->getStatus() != Wavefront::S_BARRIER); - bar_id = wf->barrierId(); - assert(bar_id != WFBarrier::InvalidID); - wf->releaseBarrier(); - cu->decMaxBarrierCnt(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " - "program and decrementing max barrier count for " - "barrier Id%d. New max count: %d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, - cu->maxBarrierCnt(bar_id)); - } - - DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", - wf->computeUnit->cu_id, wf->wgId, refCount); - - wf->computeUnit->registerManager->freeRegisters(wf); - wf->computeUnit->stats.completedWfs++; - wf->computeUnit->activeWaves--; - - panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " - "than zero\n", wf->computeUnit->cu_id); - - DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", - wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); - - for (int i = 0; i < wf->vecReads.size(); i++) { - if (wf->rawDist.find(i) != wf->rawDist.end()) { - wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); - } - } - wf->vecReads.clear(); - wf->rawDist.clear(); - wf->lastInstExec = 0; - - if (!refCount) { - /** - * If all WFs have finished, and hence the WG has finished, - * then we can free up the barrier belonging to the parent - * WG, but only if we actually used a barrier (i.e., more - * than one WF in the WG). - */ - if (bar_id != WFBarrier::InvalidID) { - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " - "now complete. Releasing barrier Id%d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, - wf->barrierId()); - cu->releaseBarrier(bar_id); - } - - /** - * Last wavefront of the workgroup has executed return. If the - * workgroup is not the final one in the kernel, then simply - * retire it; however, if it is the final one, i.e., indicating - * the kernel end, then release operation (i.e., GL2 WB) is - * needed - */ - - //check whether the workgroup is indicating the kernel end, i.e., - //the last workgroup in the kernel - bool kernelEnd = - wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); - - bool relNeeded = - wf->computeUnit->shader->impl_kern_end_rel; - - //if it is not a kernel end, then retire the workgroup directly - if (!kernelEnd || !relNeeded) { - wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); - wf->setStatus(Wavefront::S_STOPPED); - wf->computeUnit->stats.completedWGs++; - - return; - } - - /** - * if it is a kernel end, inject a memory sync, i.e., GL2 WB, and - * retire the workgroup after receving response. - * note that GL0V and GL1 are read only, and they just forward GL2 - * WB request. When forwarding, GL1 send the request to all GL2 in - * the complex - */ - setFlag(MemSync); - setFlag(GlobalSegment); - // Notify Memory System of Kernel Completion - // Kernel End = isKernel + isMemSync - wf->setStatus(Wavefront::S_RETURNING); - gpuDynInst->simdId = wf->simdId; - gpuDynInst->wfSlotId = wf->wfSlotId; - gpuDynInst->wfDynId = wf->wfDynId; - - DPRINTF(GPUExec, "inject global memory fence for CU%d: " - "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId); - - // call shader to prepare the flush operations - wf->computeUnit->shader->prepareFlush(gpuDynInst); - - wf->computeUnit->stats.completedWGs++; - } else { - wf->computeUnit->shader->dispatcher().scheduleDispatch(); - } - } // execute - - // --- Inst_SOPP__S_BRANCH class methods --- - - Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_branch") - { - setFlag(Branch); - } // Inst_SOPP__S_BRANCH - - Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() - { - } // ~Inst_SOPP__S_BRANCH - - // --- description from .arch file --- - // PC = PC + signext(SIMM16 * 4) + 4 (short jump). - // For a long jump, use S_SETPC. - void - Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_WAKEUP class methods --- - - Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_wakeup") - { - } // Inst_SOPP__S_WAKEUP - - Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() - { - } // ~Inst_SOPP__S_WAKEUP - - // --- description from .arch file --- - // Allow a wave to 'ping' all the other waves in its threadgroup to force - // them to wake up immediately from an S_SLEEP instruction. The ping is - // ignored if the waves are not sleeping. - // This allows for more efficient polling on a memory location. The waves - // which are polling can sit in a long S_SLEEP between memory reads, but - // the wave which writes the value can tell them all to wake up early now - // that the data is available. This is useful for fBarrier implementations - // (speedup). - // This method is also safe from races because if any wave misses the ping, - // everything still works fine (whoever missed it just completes their - // normal S_SLEEP). - void - Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_SCC0 class methods --- - - Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc0") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC0 - - Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() - { - } // ~Inst_SOPP__S_CBRANCH_SCC0 - - // --- description from .arch file --- - // if(SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (!scc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_SCC1 class methods --- - - Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc1") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC1 - - Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() - { - } // ~Inst_SOPP__S_CBRANCH_SCC1 - - // --- description from .arch file --- - // if(SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_VCCZ class methods --- - - Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCZ - - Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCZ - - // --- description from .arch file --- - // if(VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - vcc.read(); - - if (!vcc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_VCCNZ class methods --- - - Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccnz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCNZ - - Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCNZ - - // --- description from .arch file --- - // if(VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - vcc.read(); - - if (vcc.rawData()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_CBRANCH_EXECZ class methods --- - - Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execz") - { - setFlag(Branch); - setFlag(ReadsEXEC); - } // Inst_SOPP__S_CBRANCH_EXECZ - - Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECZ - - // --- description from .arch file --- - // if(EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().none()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_CBRANCH_EXECNZ class methods --- - - Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execnz") - { - setFlag(Branch); - setFlag(ReadsEXEC); - } // Inst_SOPP__S_CBRANCH_EXECNZ - - Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECNZ - - // --- description from .arch file --- - // if(EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().any()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_BARRIER class methods --- - - Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_barrier") - { - setFlag(MemBarrier); - } // Inst_SOPP__S_BARRIER - - Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() - { - } // ~Inst_SOPP__S_BARRIER - - // --- description from .arch file --- - // Synchronize waves within a threadgroup. - // If not all waves of the threadgroup have been created yet, waits for - // entire group before proceeding. - // If some waves in the threadgroup have already terminated, this waits on - // only the surviving waves. - // Barriers are legal inside trap handlers. - void - Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - if (wf->hasBarrier()) { - int bar_id = wf->barrierId(); - assert(wf->getStatus() == Wavefront::S_BARRIER); - cu->incNumAtBarrier(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " - "barrier Id%d. %d waves now at barrier, %d waves " - "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, - wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), - cu->numYetToReachBarrier(bar_id)); - } - } // execute - // --- Inst_SOPP__S_SETKILL class methods --- - - Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setkill") - { - } // Inst_SOPP__S_SETKILL - - Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() - { - } // ~Inst_SOPP__S_SETKILL - - // --- description from .arch file --- - // set KILL bit to value of SIMM16[0]. - // Used primarily for debugging kill wave host command behavior. - void - Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_WAITCNT class methods --- - - Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_waitcnt") - { - setFlag(ALU); - setFlag(Waitcnt); - } // Inst_SOPP__S_WAITCNT - - Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() - { - } // ~Inst_SOPP__S_WAITCNT - - // --- description from .arch file --- - // Wait for the counts of outstanding lds, vector-memory and - // --- export/vmem-write-data to be at or below the specified levels. - // SIMM16[3:0] = vmcount (vector memory operations), - // SIMM16[6:4] = export/mem-write-data count, - // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). - void - Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 vm_cnt = 0; - ScalarRegI32 exp_cnt = 0; - ScalarRegI32 lgkm_cnt = 0; - vm_cnt = bits(instData.SIMM16, 3, 0); - exp_cnt = bits(instData.SIMM16, 6, 4); - lgkm_cnt = bits(instData.SIMM16, 12, 8); - gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT); - gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); - } // execute - // --- Inst_SOPP__S_SETHALT class methods --- - - Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sethalt") - { - } // Inst_SOPP__S_SETHALT - - Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() - { - } // ~Inst_SOPP__S_SETHALT - - // --- description from .arch file --- - // Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume. - // The halt flag is ignored while PRIV == 1 (inside trap handlers) but the - // shader will halt immediately after the handler returns if HALT is still - // set at that time. - void - Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SLEEP class methods --- - - Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sleep") - { - setFlag(ALU); - setFlag(Sleep); - } // Inst_SOPP__S_SLEEP - - Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() - { - } // ~Inst_SOPP__S_SLEEP - - // --- description from .arch file --- - // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. - // The exact amount of delay is approximate. Compare with S_NOP. - void - Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; - gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); - // sleep duration is specified in multiples of 64 cycles - gpuDynInst->wavefront()->setSleepTime(64 * simm16); - } // execute - // --- Inst_SOPP__S_SETPRIO class methods --- - - Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setprio") - { - setFlag(ALU); - } // Inst_SOPP__S_SETPRIO - - Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() - { - } // ~Inst_SOPP__S_SETPRIO - - // --- description from .arch file --- - // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, - // 3 = highest. - // The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0], - // WaveAge[3:0]}. - void - Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU16 simm16 = instData.SIMM16; - ScalarRegU32 userPrio = simm16 & 0x3; - - warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio); - } // execute - // --- Inst_SOPP__S_SENDMSG class methods --- - - Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsg") - { - } // Inst_SOPP__S_SENDMSG - - Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() - { - } // ~Inst_SOPP__S_SENDMSG - - // --- description from .arch file --- - // Send a message upstream to VGT or the interrupt handler. - // SIMM16[9:0] contains the message type and is documented in the shader - // --- programming guide. - void - Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SENDMSGHALT class methods --- - - Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsghalt") - { - } // Inst_SOPP__S_SENDMSGHALT - - Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() - { - } // ~Inst_SOPP__S_SENDMSGHALT - - // --- description from .arch file --- - // Send a message and then HALT the wavefront; see S_SENDMSG for details. - void - Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_TRAP class methods --- - - Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_trap") - { - } // Inst_SOPP__S_TRAP - - Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() - { - } // ~Inst_SOPP__S_TRAP - - // --- description from .arch file --- - // TrapID = SIMM16[7:0]; - // Wait for all instructions to complete; - // set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0], - // PC[47:0]}; - // PC = TBA (trap base address); - // PRIV = 1. - // Enter the trap handler. This instruction may be generated internally as - // well in response to a host trap (HT = 1) or an exception. - // TrapID 0 is reserved for hardware use and should not be used in a - // shader-generated trap. - void - Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_ICACHE_INV class methods --- - - Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_icache_inv") - { - } // Inst_SOPP__S_ICACHE_INV - - Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() - { - } // ~Inst_SOPP__S_ICACHE_INV - - // --- description from .arch file --- - // Invalidate entire L1 instruction cache. - // You must have 12 separate S_NOP instructions or a jump/branch - // instruction after this instruction - // to ensure the SQ instruction buffer is purged. - void - Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_INCPERFLEVEL class methods --- - - Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_incperflevel") - { - } // Inst_SOPP__S_INCPERFLEVEL - - Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() - { - } // ~Inst_SOPP__S_INCPERFLEVEL - - // --- description from .arch file --- - // Increment performance counter specified in SIMM16[3:0] by 1. - void - Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_DECPERFLEVEL class methods --- - - Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_decperflevel") - { - } // Inst_SOPP__S_DECPERFLEVEL - - Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() - { - } // ~Inst_SOPP__S_DECPERFLEVEL - - // --- description from .arch file --- - // Decrement performance counter specified in SIMM16[3:0] by 1. - void - Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_TTRACEDATA class methods --- - - Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_ttracedata") - { - } // Inst_SOPP__S_TTRACEDATA - - Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() - { - } // ~Inst_SOPP__S_TTRACEDATA - - // --- description from .arch file --- - // Send M0 as user data to the thread trace stream. - void - Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS - - Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS - - // --- description from .arch file --- - // if(conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4) - // + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbguser") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGUSER - - Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGUSER - - // --- description from .arch file --- - // if(conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - // --- description from .arch file --- - // if(conditional_debug_system || conditional_debug_user) then PC = PC + - // --- signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - // --- description from .arch file --- - // if(conditional_debug_system && conditional_debug_user) then PC = PC + - // --- signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_ENDPGM_SAVED class methods --- - - Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm_saved") - { - } // Inst_SOPP__S_ENDPGM_SAVED - - Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() - { - } // ~Inst_SOPP__S_ENDPGM_SAVED - - // --- description from .arch file --- - // End of program; signal that a wave has been saved by the context-switch - // trap handler and terminate wavefront. - // The hardware implicitly executes S_WAITCNT 0 before executing this - // instruction. - // Use S_ENDPGM in all cases unless you are executing the context-switch - // save handler. - void - Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods --- - - Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_off") - { - } // Inst_SOPP__S_SET_GPR_IDX_OFF - - Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_OFF - - // --- description from .arch file --- - // MODE.gpr_idx_en = 0. - // Clear GPR indexing mode. Vector operations after this will not perform - // --- relative GPR addressing regardless of the contents of M0. This - // --- instruction does not modify M0. - void - Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods --- - - Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") - { - } // Inst_SOPP__S_SET_GPR_IDX_MODE - - Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_MODE - - // --- description from .arch file --- - // M0[15:12] = SIMM4. - // Modify the mode used for vector GPR indexing. - // The raw contents of the source field are read and used to set the enable - // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL - // and SIMM4[3] = VDST_REL. - void - Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_LOAD_DWORD class methods --- - - Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORD - - Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() - { - } // ~Inst_SMEM__S_LOAD_DWORD - - /** - * Read 1 dword from scalar data cache. If the offset is specified as an - * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are - * ignored). If the offset is specified as an immediate 20-bit constant, - * the constant is an unsigned byte offset. - */ - void - Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX2 class methods --- - - Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX2 - - Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_LOAD_DWORDX2 - - /** - * Read 2 dwords from scalar data cache. See s_load_dword for details on - * the offset input. - */ - void - Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX4 class methods --- - - Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX4 - - Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_LOAD_DWORDX4 - - // --- description from .arch file --- - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX8 class methods --- - - Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX8 - - Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_LOAD_DWORDX8 - - // --- description from .arch file --- - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX16 class methods --- - - Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX16 - - Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_LOAD_DWORDX16 - - // --- description from .arch file --- - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORD class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORD - - Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD - - // --- description from .arch file --- - // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the - // --- offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 1 request, size 32 - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX2 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - // --- description from .arch file --- - // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - // use U64 because 2 requests, each size 32 - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX4 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - // --- description from .arch file --- - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 4 requests, each size 32 - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX8 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - // --- description from .arch file --- - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 8 requests, each size 32 - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX16 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - // --- description from .arch file --- - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 16 requests, each size 32 - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORD class methods --- - - Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORD - - Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() - { - } // ~Inst_SMEM__S_STORE_DWORD - - // --- description from .arch file --- - // Write 1 dword to scalar data cache. - // If the offset is specified as an SGPR, the SGPR contains an unsigned - // BYTE offset (the 2 LSBs are ignored). - // If the offset is specified as an immediate 20-bit constant, the - // constant is an unsigned BYTE offset. - void - Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU32)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORDX2 class methods --- - - Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX2 - - Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_STORE_DWORDX2 - - // --- description from .arch file --- - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU64)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORDX4 class methods --- - - Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX4 - - Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_STORE_DWORDX4 - - // --- description from .arch file --- - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(gpuDynInst->scalar_data)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORD class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORD - - Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORD - - // --- description from .arch file --- - // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the - // --- offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORDX2 class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - // --- description from .arch file --- - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORDX4 class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - // --- description from .arch file --- - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_DCACHE_INV class methods --- - - Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv") - { - } // Inst_SMEM__S_DCACHE_INV - - Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() - { - } // ~Inst_SMEM__S_DCACHE_INV - - // --- description from .arch file --- - // Invalidate the scalar data cache. - void - Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_WB class methods --- - - Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb") - { - } // Inst_SMEM__S_DCACHE_WB - - Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() - { - } // ~Inst_SMEM__S_DCACHE_WB - - // --- description from .arch file --- - // Write back dirty data in the scalar data cache. - void - Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_INV_VOL class methods --- - - Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv_vol") - { - } // Inst_SMEM__S_DCACHE_INV_VOL - - Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() - { - } // ~Inst_SMEM__S_DCACHE_INV_VOL - - // --- description from .arch file --- - // Invalidate the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_WB_VOL class methods --- - - Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb_vol") - { - } // Inst_SMEM__S_DCACHE_WB_VOL - - Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() - { - } // ~Inst_SMEM__S_DCACHE_WB_VOL - - // --- description from .arch file --- - // Write back dirty data in the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_MEMTIME class methods --- - - Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memtime") - { - // s_memtime does not issue a memory request - setFlag(ALU); - } // Inst_SMEM__S_MEMTIME - - Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() - { - } // ~Inst_SMEM__S_MEMTIME - - // --- description from .arch file --- - // Return current 64-bit timestamp. - void - Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); - sdst.write(); - } // execute - // --- Inst_SMEM__S_MEMREALTIME class methods --- - - Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memrealtime") - { - } // Inst_SMEM__S_MEMREALTIME - - Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() - { - } // ~Inst_SMEM__S_MEMREALTIME - - // --- description from .arch file --- - // Return current 64-bit RTC. - void - Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_ATC_PROBE class methods --- - - Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe") - { - } // Inst_SMEM__S_ATC_PROBE - - Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() - { - } // ~Inst_SMEM__S_ATC_PROBE - - // --- description from .arch file --- - // Probe or prefetch an address into the SQC data cache. - void - Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_ATC_PROBE_BUFFER class methods --- - - Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe_buffer") - { - } // Inst_SMEM__S_ATC_PROBE_BUFFER - - Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() - { - } // ~Inst_SMEM__S_ATC_PROBE_BUFFER - - // --- description from .arch file --- - // Probe or prefetch an address into the SQC data cache. - void - Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_CNDMASK_B32 class methods --- - - Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_cndmask_b32") - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP2__V_CNDMASK_B32 - - Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() - { - } // ~Inst_VOP2__V_CNDMASK_B32 - - // --- description from .arch file --- - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ADD_F32 class methods --- - - Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_ADD_F32 - - Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() - { - } // ~Inst_VOP2__V_ADD_F32 - - // --- description from .arch file --- - // D.f = S0.f + S1.f. - void - Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_dpp[lane] + src1[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_F32 class methods --- - - Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUB_F32 - - Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() - { - } // ~Inst_VOP2__V_SUB_F32 - - // --- description from .arch file --- - // D.f = S0.f - S1.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_F32 class methods --- - - Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUBREV_F32 - - Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() - { - } // ~Inst_VOP2__V_SUBREV_F32 - - // --- description from .arch file --- - // D.f = S1.f - S0.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods --- - - Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_LEGACY_F32 - - Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP2__V_MUL_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). - void - Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_F32 class methods --- - - Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_F32 - - Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() - { - } // ~Inst_VOP2__V_MUL_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f. - void - Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_I32_I24 class methods --- - - Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_I32_I24 - - Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() - { - } // ~Inst_VOP2__V_MUL_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods --- - - Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_I32_I24 - - Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP2__V_MUL_HI_I32_I24 - - // --- description from .arch file --- - // D.i = (S0.i[23:0] * S1.i[23:0])>>32. - void - Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 - = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); - VecElemI64 tmp_src1 - = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_U32_U24 class methods --- - - Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_U32_U24 - - Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() - { - } // ~Inst_VOP2__V_MUL_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1, - VecOperandU32& vdst, Wavefront* wf) { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * - bits(src1[lane], 23, 0); - } - } - }; - - vop2Helper(gpuDynInst, opImpl); - } // execute - // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods --- - - Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_U32_U24 - - Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP2__V_MUL_HI_U32_U24 - - // --- description from .arch file --- - // D.i = (S0.u[23:0] * S1.u[23:0])>>32. - void - Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_F32 class methods --- - - Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MIN_F32 - - Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() - { - } // ~Inst_VOP2__V_MIN_F32 - - // --- description from .arch file --- - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_F32 class methods --- - - Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MAX_F32 - - Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() - { - } // ~Inst_VOP2__V_MAX_F32 - - // --- description from .arch file --- - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_I32 class methods --- - - Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I32 - - Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() - { - } // ~Inst_VOP2__V_MIN_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i). - void - Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_I32 class methods --- - - Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I32 - - Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() - { - } // ~Inst_VOP2__V_MAX_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i). - void - Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_U32 class methods --- - - Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U32 - - Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() - { - } // ~Inst_VOP2__V_MIN_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u). - void - Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_U32 class methods --- - - Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U32 - - Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() - { - } // ~Inst_VOP2__V_MAX_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u). - void - Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHRREV_B32 class methods --- - - Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B32 - - Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() - { - } // ~Inst_VOP2__V_LSHRREV_B32 - - // --- description from .arch file --- - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ASHRREV_I32 class methods --- - - Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I32 - - Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() - { - } // ~Inst_VOP2__V_ASHRREV_I32 - - // --- description from .arch file --- - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHLREV_B32 class methods --- - - Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B32 - - Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() - { - } // ~Inst_VOP2__V_LSHLREV_B32 - - // --- description from .arch file --- - // D.u = S1.u << S0.u[4:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and vdst during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: " - "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " - "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_AND_B32 class methods --- - - Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_and_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_AND_B32 - - Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() - { - } // ~Inst_VOP2__V_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isDPPInst()) { - VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_dpp[lane] & src1[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_OR_B32 class methods --- - - Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_or_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_OR_B32 - - Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() - { - } // ~Inst_VOP2__V_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] | src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_XOR_B32 class methods --- - - Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_xor_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_XOR_B32 - - Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() - { - } // ~Inst_VOP2__V_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAC_F32 class methods --- - - Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F32 - - Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() - { - } // ~Inst_VOP2__V_MAC_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + D.f. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - vdst.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0_dpp[lane], src1[lane], - vdst[lane]); - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MADMK_F32 class methods --- - - Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F32 - - Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() - { - } // ~Inst_VOP2__V_MADMK_F32 - - // --- description from .arch file --- - // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // --- modifiers. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], k, src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MADAK_F32 class methods --- - - Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F32 - - Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() - { - } // ~Inst_VOP2__V_MADAK_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // --- modifiers. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], k); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ADD_CO_U32 class methods --- - - Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_ADD_CO_U32 - - Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() - { - } // ~Inst_VOP2__V_ADD_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED - // --- overflow or carry-out for V_ADDC_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] + src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_CO_U32 class methods --- - - Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUB_CO_U32 - - Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() - { - } // ~Inst_VOP2__V_SUB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_CO_U32 class methods --- - - Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUBREV_CO_U32 - - Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() - { - } // ~Inst_VOP2__V_SUBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_ADDC_CO_U32 class methods --- - - Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_addc_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_ADDC_CO_U32 - - Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() - { - } // ~Inst_VOP2__V_ADDC_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane, lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBB_CO_U32 class methods --- - - Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subb_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBB_CO_U32 - - Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() - { - } // ~Inst_VOP2__V_SUBB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // --- overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // --- source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods --- - - Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subbrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBBREV_CO_U32 - - Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() - { - } // ~Inst_VOP2__V_SUBBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. - // SQ translates this to V_SUBREV_U32 with reversed operands. - void - Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) - > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_ADD_F16 class methods --- - - Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_ADD_F16 - - Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() - { - } // ~Inst_VOP2__V_ADD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_SUB_F16 class methods --- - - Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUB_F16 - - Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() - { - } // ~Inst_VOP2__V_SUB_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 - S1.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_SUBREV_F16 class methods --- - - Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUBREV_F16 - - Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() - { - } // ~Inst_VOP2__V_SUBREV_F16 - - // --- description from .arch file --- - // D.f16 = S1.f16 - S0.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MUL_F16 class methods --- - - Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MUL_F16 - - Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() - { - } // ~Inst_VOP2__V_MUL_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MAC_F16 class methods --- - - Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F16 - - Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() - { - } // ~Inst_VOP2__V_MAC_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + D.f16. - // Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MADMK_F16 class methods --- - - Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F16 - - Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() - { - } // ~Inst_VOP2__V_MADMK_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MADAK_F16 class methods --- - - Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F16 - - Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() - { - } // ~Inst_VOP2__V_MADAK_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_ADD_U16 class methods --- - - Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_ADD_U16 - - Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() - { - } // ~Inst_VOP2__V_ADD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 + S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_U16 class methods --- - - Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUB_U16 - - Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() - { - } // ~Inst_VOP2__V_SUB_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 - S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_U16 class methods --- - - Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUBREV_U16 - - Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() - { - } // ~Inst_VOP2__V_SUBREV_U16 - - // --- description from .arch file --- - // D.u16 = S1.u16 - S0.u16. - // Supports saturation (unsigned 16-bit integer domain). - // SQ translates this to V_SUB_U16 with reversed operands. - void - Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_LO_U16 class methods --- - - Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_lo_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_LO_U16 - - Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() - { - } // ~Inst_VOP2__V_MUL_LO_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHLREV_B16 class methods --- - - Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B16 - - Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() - { - } // ~Inst_VOP2__V_LSHLREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHRREV_B16 class methods --- - - Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B16 - - Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() - { - } // ~Inst_VOP2__V_LSHRREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ASHRREV_I16 class methods --- - - Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I16 - - Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() - { - } // ~Inst_VOP2__V_ASHRREV_I16 - - // --- description from .arch file --- - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_F16 class methods --- - - Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MAX_F16 - - Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() - { - } // ~Inst_VOP2__V_MAX_F16 - - // --- description from .arch file --- - // D.f16 = max(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MIN_F16 class methods --- - - Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MIN_F16 - - Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() - { - } // ~Inst_VOP2__V_MIN_F16 - - // --- description from .arch file --- - // D.f16 = min(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MAX_U16 class methods --- - - Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U16 - - Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() - { - } // ~Inst_VOP2__V_MAX_U16 - - // --- description from .arch file --- - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_I16 class methods --- - - Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I16 - - Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() - { - } // ~Inst_VOP2__V_MAX_I16 - - // --- description from .arch file --- - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_U16 class methods --- - - Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U16 - - Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() - { - } // ~Inst_VOP2__V_MIN_U16 - - // --- description from .arch file --- - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_I16 class methods --- - - Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I16 - - Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() - { - } // ~Inst_VOP2__V_MIN_I16 - - // --- description from .arch file --- - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LDEXP_F16 class methods --- - - Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ldexp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_LDEXP_F16 - - Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() - { - } // ~Inst_VOP2__V_LDEXP_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_ADD_U32 class methods --- - - Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_ADD_U32 - - Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() - { - } // ~Inst_VOP2__V_ADD_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - void - Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] + src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_U32 class methods --- - - Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_SUB_U32 - - Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() - { - } // ~Inst_VOP2__V_SUB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - void - Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_U32 class methods --- - - Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_SUBREV_U32 - - Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() - { - } // ~Inst_VOP2__V_SUBREV_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - void - Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_FMAC_F32 class methods --- - - Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_fmac_f32") - { - setFlag(ALU); - } // Inst_VOP2__V_FMAC_F32 - - Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32() - { - } // ~Inst_VOP2__V_FMAC_F32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - void - Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - vdst.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_NOP class methods --- - - Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_nop") - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP1__V_NOP - - Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() - { - } // ~Inst_VOP1__V_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_VOP1__V_MOV_B32 class methods --- - - Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_B32 - - Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() - { - } // ~Inst_VOP1__V_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (isDPPInst()) { - VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src_dpp.read(); - - DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - // NOTE: For VOP1, there is no SRC1, so make sure we're not trying - // to negate it or take the absolute value of it - assert(!extData.iFmt_VOP_DPP.SRC1_ABS); - assert(!extData.iFmt_VOP_DPP.SRC1_NEG); - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src_dpp[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods --- - - Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_readfirstlane_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_READFIRSTLANE_B32 - - Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() - { - } // ~Inst_VOP1__V_READFIRSTLANE_B32 - - // --- description from .arch file --- - // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data - // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) - // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ - // translates to V_READLANE_B32. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarRegI32 src_lane(0); - ScalarRegU64 exec_mask = wf->execMask().to_ullong(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (exec_mask) { - src_lane = findLsbSet(exec_mask); - } - - sdst = src[src_lane]; - - sdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_I32_F64 class methods --- - - Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_I32_F64 - - Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() - { - } // ~Inst_VOP1__V_CVT_I32_F64 - - // --- description from .arch file --- - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_I32 class methods --- - - Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_i32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_I32 - - Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() - { - } // ~Inst_VOP1__V_CVT_F64_I32 - - // --- description from .arch file --- - // D.d = (double)S0.i. - void - Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_I32 class methods --- - - Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_i32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_I32 - - Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() - { - } // ~Inst_VOP1__V_CVT_F32_I32 - - // --- description from .arch file --- - // D.f = (float)S0.i. - void - Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_U32 class methods --- - - Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_u32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_U32 - - Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() - { - } // ~Inst_VOP1__V_CVT_F32_U32 - - // --- description from .arch file --- - // D.f = (float)S0.u. - void - Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_U32_F32 class methods --- - - Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_U32_F32 - - Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() - { - } // ~Inst_VOP1__V_CVT_U32_F32 - - // --- description from .arch file --- - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_I32_F32 class methods --- - - Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_I32_F32 - - Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() - { - } // ~Inst_VOP1__V_CVT_I32_F32 - - // --- description from .arch file --- - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_MOV_FED_B32 class methods --- - - Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_fed_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_FED_B32 - - Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() - { - } // ~Inst_VOP1__V_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u; - // Introduce EDC double error upon write to dest vgpr without causing an - // --- exception. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_F32 class methods --- - - Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F16_F32 - - Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() - { - } // ~Inst_VOP1__V_CVT_F16_F32 - - // --- description from .arch file --- - // D.f16 = flt32_to_flt16(S0.f). - // Supports input modifiers and creates FP16 denormals when appropriate. - void - Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F32_F16 class methods --- - - Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f16") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_F16 - - Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() - { - } // ~Inst_VOP1__V_CVT_F32_F16 - - // --- description from .arch file --- - // D.f = flt16_to_flt32(S0.f16). - // FP16 denormal inputs are always accepted. - void - Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods --- - - Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_RPI_I32_F32 - - Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP1__V_CVT_RPI_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods --- - - Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_FLR_I32_F32 - - Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP1__V_CVT_FLR_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f). - void - Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods --- - - Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_OFF_F32_I4 - - Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP1__V_CVT_OFF_F32_I4 - - // --- description from .arch file --- - // 4-bit signed int to 32-bit float. Used for interpolation in shader. - void - Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - // Could not parse sq_uc.arch desc field - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F32_F64 class methods --- - - Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F32_F64 - - Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() - { - } // ~Inst_VOP1__V_CVT_F32_F64 - - // --- description from .arch file --- - // D.f = (float)S0.d. - void - Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_F32 class methods --- - - Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_f32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_F32 - - Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() - { - } // ~Inst_VOP1__V_CVT_F64_F32 - - // --- description from .arch file --- - // D.d = (double)S0.f. - void - Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE0 - - Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE0 - - // --- description from .arch file --- - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE1 - - Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE1 - - // --- description from .arch file --- - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE2 - - Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE2 - - // --- description from .arch file --- - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE3 - - Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE3 - - // --- description from .arch file --- - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_U32_F64 class methods --- - - Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_U32_F64 - - Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() - { - } // ~Inst_VOP1__V_CVT_U32_F64 - - // --- description from .arch file --- - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_U32 class methods --- - - Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_u32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_U32 - - Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() - { - } // ~Inst_VOP1__V_CVT_F64_U32 - - // --- description from .arch file --- - // D.d = (double)S0.u. - void - Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_TRUNC_F64 class methods --- - - Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_TRUNC_F64 - - Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() - { - } // ~Inst_VOP1__V_TRUNC_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CEIL_F64 class methods --- - - Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CEIL_F64 - - Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() - { - } // ~Inst_VOP1__V_CEIL_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. - void - Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RNDNE_F64 class methods --- - - Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RNDNE_F64 - - Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() - { - } // ~Inst_VOP1__V_RNDNE_F64 - - // --- description from .arch file --- - // D.d = round_nearest_even(S0.d). - void - Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FLOOR_F64 class methods --- - - Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FLOOR_F64 - - Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() - { - } // ~Inst_VOP1__V_FLOOR_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. - void - Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FRACT_F32 class methods --- - - Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FRACT_F32 - - Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() - { - } // ~Inst_VOP1__V_FRACT_F32 - - // --- description from .arch file --- - // D.f = S0.f - floor(S0.f). - void - Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_TRUNC_F32 class methods --- - - Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_TRUNC_F32 - - Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() - { - } // ~Inst_VOP1__V_TRUNC_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst (gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CEIL_F32 class methods --- - - Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CEIL_F32 - - Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() - { - } // ~Inst_VOP1__V_CEIL_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. - void - Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RNDNE_F32 class methods --- - - Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RNDNE_F32 - - Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() - { - } // ~Inst_VOP1__V_RNDNE_F32 - - // --- description from .arch file --- - // D.f = round_nearest_even(S0.f). - void - Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FLOOR_F32 class methods --- - - Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FLOOR_F32 - - Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() - { - } // ~Inst_VOP1__V_FLOOR_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. - void - Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_EXP_F32 class methods --- - - Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_F32 - - Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() - { - } // ~Inst_VOP1__V_EXP_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f). - void - Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_LOG_F32 class methods --- - - Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_F32 - - Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() - { - } // ~Inst_VOP1__V_LOG_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm. - void - Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_F32 class methods --- - - Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_F32 - - Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() - { - } // ~Inst_VOP1__V_RCP_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. - void - Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods --- - - Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_iflag_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_IFLAG_F32 - - Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP1__V_RCP_IFLAG_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise - // --- integer DIV_BY_ZERO exception but cannot raise floating-point - // --- exceptions. - void - Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RSQ_F32 class methods --- - - Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RSQ_F32 - - Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() - { - } // ~Inst_VOP1__V_RSQ_F32 - - // --- description from .arch file --- - // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. - void - Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_F64 class methods --- - - Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RCP_F64 - - Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() - { - } // ~Inst_VOP1__V_RCP_F64 - - // --- description from .arch file --- - // D.d = 1.0 / S0.d. - void - Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RSQ_F64 class methods --- - - Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RSQ_F64 - - Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() - { - } // ~Inst_VOP1__V_RSQ_F64 - - // --- description from .arch file --- - // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. - void - Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) - && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SQRT_F32 class methods --- - - Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SQRT_F32 - - Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() - { - } // ~Inst_VOP1__V_SQRT_F32 - - // --- description from .arch file --- - // D.f = sqrt(S0.f). - void - Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SQRT_F64 class methods --- - - Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_SQRT_F64 - - Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() - { - } // ~Inst_VOP1__V_SQRT_F64 - - // --- description from .arch file --- - // D.d = sqrt(S0.d). - void - Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SIN_F32 class methods --- - - Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SIN_F32 - - Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() - { - } // ~Inst_VOP1__V_SIN_F32 - - // --- description from .arch file --- - // D.f = sin(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 0.0. - void - Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_COS_F32 class methods --- - - Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_COS_F32 - - Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() - { - } // ~Inst_VOP1__V_COS_F32 - - // --- description from .arch file --- - // D.f = cos(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 1.0. - void - Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_NOT_B32 class methods --- - - Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_not_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_NOT_B32 - - Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() - { - } // ~Inst_VOP1__V_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_BFREV_B32 class methods --- - - Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_bfrev_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_BFREV_B32 - - Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() - { - } // ~Inst_VOP1__V_BFREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBH_U32 class methods --- - - Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_u32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_U32 - - Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() - { - } // ~Inst_VOP1__V_FFBH_U32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBL_B32 class methods --- - - Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbl_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBL_B32 - - Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() - { - } // ~Inst_VOP1__V_FFBL_B32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBH_I32 class methods --- - - Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_i32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_I32 - - Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() - { - } // ~Inst_VOP1__V_FFBH_I32 - - // --- description from .arch file --- - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods --- - - Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_EXP_I32_F64 - - Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 - - // --- description from .arch file --- - // See V_FREXP_EXP_I32_F32. - void - Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp = 0; - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F64 class methods --- - - Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_MANT_F64 - - Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() - { - } // ~Inst_VOP1__V_FREXP_MANT_F64 - - // --- description from .arch file --- - // See V_FREXP_MANT_F32. - void - Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FRACT_F64 class methods --- - - Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FRACT_F64 - - Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() - { - } // ~Inst_VOP1__V_FRACT_F64 - - // --- description from .arch file --- - // See V_FRACT_F32. - void - Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF64 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods --- - - Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_EXP_I32_F32 - - Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). - // Returns exponent of single precision float input, such that S0.f = - // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns - // the significand. - void - Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F32 class methods --- - - Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_MANT_F32 - - Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() - { - } // ~Inst_VOP1__V_FREXP_MANT_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = Mantissa(S0.f). - // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary - // --- significand of single precision float input, such that S0.f = - // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which - // --- returns integer exponent. - void - Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CLREXCP class methods --- - - Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_clrexcp") - { - setFlag(ALU); - } // Inst_VOP1__V_CLREXCP - - Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() - { - } // ~Inst_VOP1__V_CLREXCP - - // --- description from .arch file --- - // Clear wave's exception state in SIMD (SP). - void - Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_U16 class methods --- - - Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_u16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_U16 - - Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() - { - } // ~Inst_VOP1__V_CVT_F16_U16 - - // --- description from .arch file --- - // D.f16 = uint16_to_flt16(S.u16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_I16 class methods --- - - Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_i16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_I16 - - Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() - { - } // ~Inst_VOP1__V_CVT_F16_I16 - - // --- description from .arch file --- - // D.f16 = int16_to_flt16(S.i16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_U16_F16 class methods --- - - Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_U16_F16 - - Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() - { - } // ~Inst_VOP1__V_CVT_U16_F16 - - // --- description from .arch file --- - // D.u16 = flt16_to_uint16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_I16_F16 class methods --- - - Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_I16_F16 - - Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() - { - } // ~Inst_VOP1__V_CVT_I16_F16 - - // --- description from .arch file --- - // D.i16 = flt16_to_int16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RCP_F16 class methods --- - - Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RCP_F16 - - Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() - { - } // ~Inst_VOP1__V_RCP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecip(S0.f16). - void - Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_SQRT_F16 class methods --- - - Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SQRT_F16 - - Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() - { - } // ~Inst_VOP1__V_SQRT_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateSqrt(S0.f16). - void - Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RSQ_F16 class methods --- - - Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RSQ_F16 - - Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() - { - } // ~Inst_VOP1__V_RSQ_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecipSqrt(S0.f16). - void - Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_LOG_F16 class methods --- - - Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_LOG_F16 - - Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() - { - } // ~Inst_VOP1__V_LOG_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = ApproximateLog2(S0.f16). - void - Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_EXP_F16 class methods --- - - Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_EXP_F16 - - Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() - { - } // ~Inst_VOP1__V_EXP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = Approximate2ToX(S0.f16). - void - Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F16 class methods --- - - Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_MANT_F16 - - Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() - { - } // ~Inst_VOP1__V_FREXP_MANT_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - // Result range is (-1.0,-0.5][0.5,1.0). - // C math library frexp function. - // Returns binary significand of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods --- - - Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_EXP_I16_F16 - - Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.i16 = 0; - // else - // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). - // C math library frexp function. - // Returns exponent of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FLOOR_F16 class methods --- - - Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FLOOR_F16 - - Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() - { - } // ~Inst_VOP1__V_FLOOR_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. - void - Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CEIL_F16 class methods --- - - Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CEIL_F16 - - Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() - { - } // ~Inst_VOP1__V_CEIL_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. - void - Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_TRUNC_F16 class methods --- - - Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_TRUNC_F16 - - Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() - { - } // ~Inst_VOP1__V_TRUNC_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16). - // Round-to-zero semantics. - void - Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RNDNE_F16 class methods --- - - Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RNDNE_F16 - - Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() - { - } // ~Inst_VOP1__V_RNDNE_F16 - - // --- description from .arch file --- - // D.f16 = FLOOR(S0.f16 + 0.5f); - // if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. - // Round-to-nearest-even semantics. - void - Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FRACT_F16 class methods --- - - Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FRACT_F16 - - Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() - { - } // ~Inst_VOP1__V_FRACT_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_SIN_F16 class methods --- - - Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SIN_F16 - - Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() - { - } // ~Inst_VOP1__V_SIN_F16 - - // --- description from .arch file --- - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_COS_F16 class methods --- - - Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_COS_F16 - - Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() - { - } // ~Inst_VOP1__V_COS_F16 - - // --- description from .arch file --- - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods --- - - Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_LEGACY_F32 - - Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP1__V_EXP_LEGACY_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f) with legacy semantics. - void - Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods --- - - Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_LEGACY_F32 - - Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP1__V_LOG_LEGACY_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. - void - Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F32 class methods --- - - Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_CLASS_F32 - - Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() - { - } // ~Inst_VOPC__V_CMP_CLASS_F32 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F32 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F32 - - Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F32 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f The function reports true if the floating point value is *any* of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F64 class methods --- - - Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_CLASS_F64 - - Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() - { - } // ~Inst_VOPC__V_CMP_CLASS_F64 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F64 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F64 - - Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F64 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d The function reports true if the floating point value is *any* of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F16 class methods --- - - Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_CLASS_F16 - - Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() - { - } // ~Inst_VOPC__V_CMP_CLASS_F16 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F16 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F16 - - Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F16 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // --- S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_F_F16 class methods --- - - Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_F_F16 - - Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() - { - } // ~Inst_VOPC__V_CMP_F_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F16 class methods --- - - Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LT_F16 - - Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() - { - } // ~Inst_VOPC__V_CMP_LT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F16 class methods --- - - Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_EQ_F16 - - Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() - { - } // ~Inst_VOPC__V_CMP_EQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F16 class methods --- - - Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LE_F16 - - Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() - { - } // ~Inst_VOPC__V_CMP_LE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F16 class methods --- - - Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GT_F16 - - Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() - { - } // ~Inst_VOPC__V_CMP_GT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F16 class methods --- - - Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LG_F16 - - Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() - { - } // ~Inst_VOPC__V_CMP_LG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F16 class methods --- - - Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GE_F16 - - Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() - { - } // ~Inst_VOPC__V_CMP_GE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_O_F16 class methods --- - - Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_O_F16 - - Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() - { - } // ~Inst_VOPC__V_CMP_O_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_U_F16 class methods --- - - Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_U_F16 - - Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() - { - } // ~Inst_VOPC__V_CMP_U_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F16 class methods --- - - Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGE_F16 - - Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() - { - } // ~Inst_VOPC__V_CMP_NGE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F16 class methods --- - - Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLG_F16 - - Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() - { - } // ~Inst_VOPC__V_CMP_NLG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F16 class methods --- - - Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGT_F16 - - Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() - { - } // ~Inst_VOPC__V_CMP_NGT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F16 class methods --- - - Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLE_F16 - - Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() - { - } // ~Inst_VOPC__V_CMP_NLE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F16 class methods --- - - Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NEQ_F16 - - Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() - { - } // ~Inst_VOPC__V_CMP_NEQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F16 class methods --- - - Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLT_F16 - - Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() - { - } // ~Inst_VOPC__V_CMP_NLT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F16 class methods --- - - Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_TRU_F16 - - Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() - { - } // ~Inst_VOPC__V_CMP_TRU_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F16 class methods --- - - Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F16 - - Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() - { - } // ~Inst_VOPC__V_CMPX_F_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F16 class methods --- - - Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F16 - - Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() - { - } // ~Inst_VOPC__V_CMPX_LT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F16 class methods --- - - Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F16 - - Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() - { - } // ~Inst_VOPC__V_CMPX_EQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F16 class methods --- - - Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F16 - - Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() - { - } // ~Inst_VOPC__V_CMPX_LE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F16 class methods --- - - Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F16 - - Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() - { - } // ~Inst_VOPC__V_CMPX_GT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F16 class methods --- - - Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F16 - - Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() - { - } // ~Inst_VOPC__V_CMPX_LG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F16 class methods --- - - Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F16 - - Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() - { - } // ~Inst_VOPC__V_CMPX_GE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F16 class methods --- - - Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F16 - - Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() - { - } // ~Inst_VOPC__V_CMPX_O_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F16 class methods --- - - Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F16 - - Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() - { - } // ~Inst_VOPC__V_CMPX_U_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F16 class methods --- - - Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F16 - - Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() - { - } // ~Inst_VOPC__V_CMPX_NGE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F16 class methods --- - - Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F16 - - Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() - { - } // ~Inst_VOPC__V_CMPX_NLG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F16 class methods --- - - Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F16 - - Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() - { - } // ~Inst_VOPC__V_CMPX_NGT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F16 class methods --- - - Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F16 - - Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() - { - } // ~Inst_VOPC__V_CMPX_NLE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F16 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F16 - - Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F16 class methods --- - - Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F16 - - Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() - { - } // ~Inst_VOPC__V_CMPX_NLT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F16 class methods --- - - Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F16 - - Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() - { - } // ~Inst_VOPC__V_CMPX_TRU_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_F_F32 class methods --- - - Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_F_F32 - - Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() - { - } // ~Inst_VOPC__V_CMP_F_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F32 class methods --- - - Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LT_F32 - - Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() - { - } // ~Inst_VOPC__V_CMP_LT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F32 class methods --- - - Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_EQ_F32 - - Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() - { - } // ~Inst_VOPC__V_CMP_EQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F32 class methods --- - - Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LE_F32 - - Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() - { - } // ~Inst_VOPC__V_CMP_LE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F32 class methods --- - - Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GT_F32 - - Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() - { - } // ~Inst_VOPC__V_CMP_GT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F32 class methods --- - - Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LG_F32 - - Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() - { - } // ~Inst_VOPC__V_CMP_LG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F32 class methods --- - - Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GE_F32 - - Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() - { - } // ~Inst_VOPC__V_CMP_GE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_O_F32 class methods --- - - Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_O_F32 - - Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() - { - } // ~Inst_VOPC__V_CMP_O_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_U_F32 class methods --- - - Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_U_F32 - - Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() - { - } // ~Inst_VOPC__V_CMP_U_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F32 class methods --- - - Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGE_F32 - - Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() - { - } // ~Inst_VOPC__V_CMP_NGE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F32 class methods --- - - Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLG_F32 - - Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() - { - } // ~Inst_VOPC__V_CMP_NLG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F32 class methods --- - - Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGT_F32 - - Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() - { - } // ~Inst_VOPC__V_CMP_NGT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F32 class methods --- - - Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLE_F32 - - Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() - { - } // ~Inst_VOPC__V_CMP_NLE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F32 class methods --- - - Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NEQ_F32 - - Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() - { - } // ~Inst_VOPC__V_CMP_NEQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F32 class methods --- - - Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLT_F32 - - Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() - { - } // ~Inst_VOPC__V_CMP_NLT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F32 class methods --- - - Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_TRU_F32 - - Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() - { - } // ~Inst_VOPC__V_CMP_TRU_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F32 class methods --- - - Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F32 - - Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() - { - } // ~Inst_VOPC__V_CMPX_F_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F32 class methods --- - - Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F32 - - Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() - { - } // ~Inst_VOPC__V_CMPX_LT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F32 class methods --- - - Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F32 - - Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() - { - } // ~Inst_VOPC__V_CMPX_EQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F32 class methods --- - - Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F32 - - Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() - { - } // ~Inst_VOPC__V_CMPX_LE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F32 class methods --- - - Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F32 - - Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() - { - } // ~Inst_VOPC__V_CMPX_GT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F32 class methods --- - - Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F32 - - Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() - { - } // ~Inst_VOPC__V_CMPX_LG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F32 class methods --- - - Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F32 - - Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() - { - } // ~Inst_VOPC__V_CMPX_GE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F32 class methods --- - - Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F32 - - Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() - { - } // ~Inst_VOPC__V_CMPX_O_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F32 class methods --- - - Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F32 - - Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() - { - } // ~Inst_VOPC__V_CMPX_U_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F32 class methods --- - - Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F32 - - Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() - { - } // ~Inst_VOPC__V_CMPX_NGE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F32 class methods --- - - Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F32 - - Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() - { - } // ~Inst_VOPC__V_CMPX_NLG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F32 class methods --- - - Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F32 - - Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() - { - } // ~Inst_VOPC__V_CMPX_NGT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F32 class methods --- - - Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F32 - - Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() - { - } // ~Inst_VOPC__V_CMPX_NLE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F32 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F32 - - Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F32 class methods --- - - Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F32 - - Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() - { - } // ~Inst_VOPC__V_CMPX_NLT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F32 class methods --- - - Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F32 - - Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() - { - } // ~Inst_VOPC__V_CMPX_TRU_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_F_F64 class methods --- - - Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_F_F64 - - Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() - { - } // ~Inst_VOPC__V_CMP_F_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F64 class methods --- - - Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LT_F64 - - Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() - { - } // ~Inst_VOPC__V_CMP_LT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F64 class methods --- - - Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_EQ_F64 - - Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() - { - } // ~Inst_VOPC__V_CMP_EQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F64 class methods --- - - Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LE_F64 - - Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() - { - } // ~Inst_VOPC__V_CMP_LE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F64 class methods --- - - Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GT_F64 - - Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() - { - } // ~Inst_VOPC__V_CMP_GT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F64 class methods --- - - Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LG_F64 - - Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() - { - } // ~Inst_VOPC__V_CMP_LG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F64 class methods --- - - Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GE_F64 - - Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() - { - } // ~Inst_VOPC__V_CMP_GE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_O_F64 class methods --- - - Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_O_F64 - - Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() - { - } // ~Inst_VOPC__V_CMP_O_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_U_F64 class methods --- - - Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_U_F64 - - Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() - { - } // ~Inst_VOPC__V_CMP_U_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F64 class methods --- - - Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGE_F64 - - Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() - { - } // ~Inst_VOPC__V_CMP_NGE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F64 class methods --- - - Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLG_F64 - - Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() - { - } // ~Inst_VOPC__V_CMP_NLG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F64 class methods --- - - Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGT_F64 - - Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() - { - } // ~Inst_VOPC__V_CMP_NGT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F64 class methods --- - - Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLE_F64 - - Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() - { - } // ~Inst_VOPC__V_CMP_NLE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F64 class methods --- - - Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NEQ_F64 - - Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() - { - } // ~Inst_VOPC__V_CMP_NEQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F64 class methods --- - - Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLT_F64 - - Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() - { - } // ~Inst_VOPC__V_CMP_NLT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F64 class methods --- - - Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_TRU_F64 - - Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() - { - } // ~Inst_VOPC__V_CMP_TRU_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F64 class methods --- - - Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F64 - - Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() - { - } // ~Inst_VOPC__V_CMPX_F_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F64 class methods --- - - Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F64 - - Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() - { - } // ~Inst_VOPC__V_CMPX_LT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F64 class methods --- - - Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F64 - - Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() - { - } // ~Inst_VOPC__V_CMPX_EQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F64 class methods --- - - Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F64 - - Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() - { - } // ~Inst_VOPC__V_CMPX_LE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F64 class methods --- - - Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F64 - - Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() - { - } // ~Inst_VOPC__V_CMPX_GT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F64 class methods --- - - Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F64 - - Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() - { - } // ~Inst_VOPC__V_CMPX_LG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F64 class methods --- - - Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F64 - - Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() - { - } // ~Inst_VOPC__V_CMPX_GE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F64 class methods --- - - Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F64 - - Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() - { - } // ~Inst_VOPC__V_CMPX_O_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F64 class methods --- - - Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F64 - - Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() - { - } // ~Inst_VOPC__V_CMPX_U_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F64 class methods --- - - Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F64 - - Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() - { - } // ~Inst_VOPC__V_CMPX_NGE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F64 class methods --- - - Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F64 - - Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() - { - } // ~Inst_VOPC__V_CMPX_NLG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F64 class methods --- - - Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F64 - - Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() - { - } // ~Inst_VOPC__V_CMPX_NGT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F64 class methods --- - - Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F64 - - Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() - { - } // ~Inst_VOPC__V_CMPX_NLE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F64 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F64 - - Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F64 class methods --- - - Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F64 - - Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() - { - } // ~Inst_VOPC__V_CMPX_NLT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F64 class methods --- - - Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F64 - - Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() - { - } // ~Inst_VOPC__V_CMPX_TRU_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I16 class methods --- - - Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I16 - - Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() - { - } // ~Inst_VOPC__V_CMP_F_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I16 class methods --- - - Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I16 - - Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() - { - } // ~Inst_VOPC__V_CMP_LT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I16 class methods --- - - Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I16 - - Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() - { - } // ~Inst_VOPC__V_CMP_EQ_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I16 class methods --- - - Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I16 - - Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() - { - } // ~Inst_VOPC__V_CMP_LE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I16 class methods --- - - Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I16 - - Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() - { - } // ~Inst_VOPC__V_CMP_GT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I16 class methods --- - - Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I16 - - Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() - { - } // ~Inst_VOPC__V_CMP_NE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I16 class methods --- - - Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I16 - - Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() - { - } // ~Inst_VOPC__V_CMP_GE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I16 class methods --- - - Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I16 - - Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() - { - } // ~Inst_VOPC__V_CMP_T_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U16 class methods --- - - Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U16 - - Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() - { - } // ~Inst_VOPC__V_CMP_F_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U16 class methods --- - - Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U16 - - Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() - { - } // ~Inst_VOPC__V_CMP_LT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U16 class methods --- - - Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U16 - - Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() - { - } // ~Inst_VOPC__V_CMP_EQ_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U16 class methods --- - - Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U16 - - Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() - { - } // ~Inst_VOPC__V_CMP_LE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U16 class methods --- - - Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U16 - - Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() - { - } // ~Inst_VOPC__V_CMP_GT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U16 class methods --- - - Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U16 - - Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() - { - } // ~Inst_VOPC__V_CMP_NE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U16 class methods --- - - Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U16 - - Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() - { - } // ~Inst_VOPC__V_CMP_GE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U16 class methods --- - - Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U16 - - Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() - { - } // ~Inst_VOPC__V_CMP_T_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I16 class methods --- - - Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I16 - - Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() - { - } // ~Inst_VOPC__V_CMPX_F_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I16 class methods --- - - Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I16 - - Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() - { - } // ~Inst_VOPC__V_CMPX_LT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I16 class methods --- - - Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I16 - - Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() - { - } // ~Inst_VOPC__V_CMPX_EQ_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I16 class methods --- - - Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I16 - - Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() - { - } // ~Inst_VOPC__V_CMPX_LE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I16 class methods --- - - Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I16 - - Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() - { - } // ~Inst_VOPC__V_CMPX_GT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I16 class methods --- - - Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I16 - - Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() - { - } // ~Inst_VOPC__V_CMPX_NE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I16 class methods --- - - Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I16 - - Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() - { - } // ~Inst_VOPC__V_CMPX_GE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I16 class methods --- - - Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I16 - - Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() - { - } // ~Inst_VOPC__V_CMPX_T_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U16 class methods --- - - Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U16 - - Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() - { - } // ~Inst_VOPC__V_CMPX_F_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U16 class methods --- - - Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U16 - - Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() - { - } // ~Inst_VOPC__V_CMPX_LT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U16 class methods --- - - Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U16 - - Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() - { - } // ~Inst_VOPC__V_CMPX_EQ_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U16 class methods --- - - Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U16 - - Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() - { - } // ~Inst_VOPC__V_CMPX_LE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U16 class methods --- - - Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U16 - - Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() - { - } // ~Inst_VOPC__V_CMPX_GT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U16 class methods --- - - Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U16 - - Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() - { - } // ~Inst_VOPC__V_CMPX_NE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U16 class methods --- - - Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U16 - - Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() - { - } // ~Inst_VOPC__V_CMPX_GE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U16 class methods --- - - Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U16 - - Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() - { - } // ~Inst_VOPC__V_CMPX_T_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I32 class methods --- - - Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I32 - - Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() - { - } // ~Inst_VOPC__V_CMP_F_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I32 class methods --- - - Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I32 - - Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() - { - } // ~Inst_VOPC__V_CMP_LT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I32 class methods --- - - Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I32 - - Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() - { - } // ~Inst_VOPC__V_CMP_EQ_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I32 class methods --- - - Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I32 - - Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() - { - } // ~Inst_VOPC__V_CMP_LE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I32 class methods --- - - Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I32 - - Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() - { - } // ~Inst_VOPC__V_CMP_GT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I32 class methods --- - - Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I32 - - Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() - { - } // ~Inst_VOPC__V_CMP_NE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I32 class methods --- - - Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I32 - - Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() - { - } // ~Inst_VOPC__V_CMP_GE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I32 class methods --- - - Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I32 - - Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() - { - } // ~Inst_VOPC__V_CMP_T_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U32 class methods --- - - Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U32 - - Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() - { - } // ~Inst_VOPC__V_CMP_F_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U32 class methods --- - - Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U32 - - Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() - { - } // ~Inst_VOPC__V_CMP_LT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U32 class methods --- - - Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U32 - - Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() - { - } // ~Inst_VOPC__V_CMP_EQ_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U32 class methods --- - - Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U32 - - Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() - { - } // ~Inst_VOPC__V_CMP_LE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U32 class methods --- - - Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U32 - - Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() - { - } // ~Inst_VOPC__V_CMP_GT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U32 class methods --- - - Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U32 - - Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() - { - } // ~Inst_VOPC__V_CMP_NE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U32 class methods --- - - Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U32 - - Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() - { - } // ~Inst_VOPC__V_CMP_GE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U32 class methods --- - - Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U32 - - Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() - { - } // ~Inst_VOPC__V_CMP_T_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I32 class methods --- - - Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I32 - - Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() - { - } // ~Inst_VOPC__V_CMPX_F_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I32 class methods --- - - Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I32 - - Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() - { - } // ~Inst_VOPC__V_CMPX_LT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I32 class methods --- - - Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I32 - - Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() - { - } // ~Inst_VOPC__V_CMPX_EQ_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I32 class methods --- - - Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I32 - - Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() - { - } // ~Inst_VOPC__V_CMPX_LE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I32 class methods --- - - Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I32 - - Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() - { - } // ~Inst_VOPC__V_CMPX_GT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I32 class methods --- - - Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I32 - - Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() - { - } // ~Inst_VOPC__V_CMPX_NE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I32 class methods --- - - Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I32 - - Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() - { - } // ~Inst_VOPC__V_CMPX_GE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I32 class methods --- - - Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I32 - - Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() - { - } // ~Inst_VOPC__V_CMPX_T_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U32 class methods --- - - Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U32 - - Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() - { - } // ~Inst_VOPC__V_CMPX_F_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U32 class methods --- - - Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U32 - - Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() - { - } // ~Inst_VOPC__V_CMPX_LT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U32 class methods --- - - Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U32 - - Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() - { - } // ~Inst_VOPC__V_CMPX_EQ_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U32 class methods --- - - Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U32 - - Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() - { - } // ~Inst_VOPC__V_CMPX_LE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U32 class methods --- - - Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U32 - - Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() - { - } // ~Inst_VOPC__V_CMPX_GT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U32 class methods --- - - Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U32 - - Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() - { - } // ~Inst_VOPC__V_CMPX_NE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U32 class methods --- - - Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U32 - - Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() - { - } // ~Inst_VOPC__V_CMPX_GE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U32 class methods --- - - Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U32 - - Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() - { - } // ~Inst_VOPC__V_CMPX_T_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I64 class methods --- - - Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I64 - - Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() - { - } // ~Inst_VOPC__V_CMP_F_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I64 class methods --- - - Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I64 - - Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() - { - } // ~Inst_VOPC__V_CMP_LT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I64 class methods --- - - Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I64 - - Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() - { - } // ~Inst_VOPC__V_CMP_EQ_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I64 class methods --- - - Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I64 - - Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() - { - } // ~Inst_VOPC__V_CMP_LE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I64 class methods --- - - Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I64 - - Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() - { - } // ~Inst_VOPC__V_CMP_GT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I64 class methods --- - - Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I64 - - Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() - { - } // ~Inst_VOPC__V_CMP_NE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I64 class methods --- - - Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I64 - - Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() - { - } // ~Inst_VOPC__V_CMP_GE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I64 class methods --- - - Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I64 - - Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() - { - } // ~Inst_VOPC__V_CMP_T_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U64 class methods --- - - Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U64 - - Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() - { - } // ~Inst_VOPC__V_CMP_F_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U64 class methods --- - - Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U64 - - Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() - { - } // ~Inst_VOPC__V_CMP_LT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U64 class methods --- - - Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U64 - - Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() - { - } // ~Inst_VOPC__V_CMP_EQ_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U64 class methods --- - - Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U64 - - Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() - { - } // ~Inst_VOPC__V_CMP_LE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U64 class methods --- - - Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U64 - - Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() - { - } // ~Inst_VOPC__V_CMP_GT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U64 class methods --- - - Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U64 - - Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() - { - } // ~Inst_VOPC__V_CMP_NE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U64 class methods --- - - Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U64 - - Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() - { - } // ~Inst_VOPC__V_CMP_GE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U64 class methods --- - - Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U64 - - Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() - { - } // ~Inst_VOPC__V_CMP_T_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I64 class methods --- - - Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I64 - - Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() - { - } // ~Inst_VOPC__V_CMPX_F_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I64 class methods --- - - Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I64 - - Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() - { - } // ~Inst_VOPC__V_CMPX_LT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I64 class methods --- - - Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I64 - - Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() - { - } // ~Inst_VOPC__V_CMPX_EQ_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I64 class methods --- - - Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I64 - - Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() - { - } // ~Inst_VOPC__V_CMPX_LE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I64 class methods --- - - Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I64 - - Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() - { - } // ~Inst_VOPC__V_CMPX_GT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I64 class methods --- - - Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I64 - - Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() - { - } // ~Inst_VOPC__V_CMPX_NE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I64 class methods --- - - Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I64 - - Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() - { - } // ~Inst_VOPC__V_CMPX_GE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I64 class methods --- - - Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I64 - - Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() - { - } // ~Inst_VOPC__V_CMPX_T_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U64 class methods --- - - Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U64 - - Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() - { - } // ~Inst_VOPC__V_CMPX_F_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U64 class methods --- - - Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U64 - - Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() - { - } // ~Inst_VOPC__V_CMPX_LT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U64 class methods --- - - Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U64 - - Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() - { - } // ~Inst_VOPC__V_CMPX_EQ_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U64 class methods --- - - Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U64 - - Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() - { - } // ~Inst_VOPC__V_CMPX_LE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U64 class methods --- - - Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U64 - - Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() - { - } // ~Inst_VOPC__V_CMPX_GT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U64 class methods --- - - Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U64 - - Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() - { - } // ~Inst_VOPC__V_CMPX_NE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U64 class methods --- - - Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U64 - - Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() - { - } // ~Inst_VOPC__V_CMPX_GE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U64 class methods --- - - Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U64 - - Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() - { - } // ~Inst_VOPC__V_CMPX_T_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VINTRP__V_INTERP_P1_F32 class methods --- - - Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p1_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P1_F32 - - Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() - { - } // ~Inst_VINTRP__V_INTERP_P1_F32 - - // --- description from .arch file --- - // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; - // if D == S then data corruption will occur. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VINTRP__V_INTERP_P2_F32 class methods --- - - Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p2_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P2_F32 - - Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() - { - } // ~Inst_VINTRP__V_INTERP_P2_F32 - - // --- description from .arch file --- - // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VINTRP__V_INTERP_MOV_F32 class methods --- - - Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_mov_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_MOV_F32 - - Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() - { - } // ~Inst_VINTRP__V_INTERP_MOV_F32 - - // --- description from .arch file --- - // D.f = {P10,P20,P0}[S.u]; parameter load. - void - Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F32 class methods --- - - Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_CLASS_F32 - - Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() - { - } // ~Inst_VOP3__V_CMP_CLASS_F32 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F32 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F32 - - Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F32 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f - // The function reports true if the floating point value is *any* of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F64 class methods --- - - Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_CLASS_F64 - - Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() - { - } // ~Inst_VOP3__V_CMP_CLASS_F64 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F64 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F64 - - Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F64 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d - // The function reports true if the floating point value is *any* of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F16 class methods --- - - Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_CLASS_F16 - - Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() - { - } // ~Inst_VOP3__V_CMP_CLASS_F16 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F16 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F16 - - Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F16 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // --- S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_F_F16 class methods --- - - Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_F_F16 - - Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() - { - } // ~Inst_VOP3__V_CMP_F_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F16 class methods --- - - Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LT_F16 - - Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() - { - } // ~Inst_VOP3__V_CMP_LT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F16 class methods --- - - Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_EQ_F16 - - Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() - { - } // ~Inst_VOP3__V_CMP_EQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F16 class methods --- - - Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LE_F16 - - Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() - { - } // ~Inst_VOP3__V_CMP_LE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F16 class methods --- - - Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GT_F16 - - Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() - { - } // ~Inst_VOP3__V_CMP_GT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F16 class methods --- - - Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LG_F16 - - Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() - { - } // ~Inst_VOP3__V_CMP_LG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F16 class methods --- - - Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GE_F16 - - Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() - { - } // ~Inst_VOP3__V_CMP_GE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_O_F16 class methods --- - - Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_O_F16 - - Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() - { - } // ~Inst_VOP3__V_CMP_O_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_U_F16 class methods --- - - Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_U_F16 - - Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() - { - } // ~Inst_VOP3__V_CMP_U_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F16 class methods --- - - Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGE_F16 - - Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() - { - } // ~Inst_VOP3__V_CMP_NGE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F16 class methods --- - - Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLG_F16 - - Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() - { - } // ~Inst_VOP3__V_CMP_NLG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F16 class methods --- - - Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGT_F16 - - Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() - { - } // ~Inst_VOP3__V_CMP_NGT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F16 class methods --- - - Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLE_F16 - - Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() - { - } // ~Inst_VOP3__V_CMP_NLE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F16 class methods --- - - Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NEQ_F16 - - Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() - { - } // ~Inst_VOP3__V_CMP_NEQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F16 class methods --- - - Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLT_F16 - - Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() - { - } // ~Inst_VOP3__V_CMP_NLT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F16 class methods --- - - Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_TRU_F16 - - Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() - { - } // ~Inst_VOP3__V_CMP_TRU_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F16 class methods --- - - Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F16 - - Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() - { - } // ~Inst_VOP3__V_CMPX_F_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F16 class methods --- - - Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F16 - - Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() - { - } // ~Inst_VOP3__V_CMPX_LT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F16 class methods --- - - Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F16 - - Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() - { - } // ~Inst_VOP3__V_CMPX_EQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F16 class methods --- - - Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F16 - - Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() - { - } // ~Inst_VOP3__V_CMPX_LE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F16 class methods --- - - Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F16 - - Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() - { - } // ~Inst_VOP3__V_CMPX_GT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F16 class methods --- - - Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F16 - - Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() - { - } // ~Inst_VOP3__V_CMPX_LG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F16 class methods --- - - Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F16 - - Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() - { - } // ~Inst_VOP3__V_CMPX_GE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F16 class methods --- - - Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F16 - - Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() - { - } // ~Inst_VOP3__V_CMPX_O_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F16 class methods --- - - Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F16 - - Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() - { - } // ~Inst_VOP3__V_CMPX_U_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F16 class methods --- - - Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F16 - - Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() - { - } // ~Inst_VOP3__V_CMPX_NGE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F16 class methods --- - - Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F16 - - Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() - { - } // ~Inst_VOP3__V_CMPX_NLG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F16 class methods --- - - Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F16 - - Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() - { - } // ~Inst_VOP3__V_CMPX_NGT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F16 class methods --- - - Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F16 - - Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() - { - } // ~Inst_VOP3__V_CMPX_NLE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F16 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F16 - - Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F16 class methods --- - - Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F16 - - Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() - { - } // ~Inst_VOP3__V_CMPX_NLT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F16 class methods --- - - Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F16 - - Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() - { - } // ~Inst_VOP3__V_CMPX_TRU_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_F32 class methods --- - - Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_F_F32 - - Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() - { - } // ~Inst_VOP3__V_CMP_F_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F32 class methods --- - - Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LT_F32 - - Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() - { - } // ~Inst_VOP3__V_CMP_LT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F32 class methods --- - - Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_EQ_F32 - - Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() - { - } // ~Inst_VOP3__V_CMP_EQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F32 class methods --- - - Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LE_F32 - - Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() - { - } // ~Inst_VOP3__V_CMP_LE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F32 class methods --- - - Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GT_F32 - - Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() - { - } // ~Inst_VOP3__V_CMP_GT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F32 class methods --- - - Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LG_F32 - - Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() - { - } // ~Inst_VOP3__V_CMP_LG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F32 class methods --- - - Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GE_F32 - - Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() - { - } // ~Inst_VOP3__V_CMP_GE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_O_F32 class methods --- - - Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_O_F32 - - Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() - { - } // ~Inst_VOP3__V_CMP_O_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_U_F32 class methods --- - - Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_U_F32 - - Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() - { - } // ~Inst_VOP3__V_CMP_U_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F32 class methods --- - - Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGE_F32 - - Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() - { - } // ~Inst_VOP3__V_CMP_NGE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F32 class methods --- - - Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLG_F32 - - Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() - { - } // ~Inst_VOP3__V_CMP_NLG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F32 class methods --- - - Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGT_F32 - - Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() - { - } // ~Inst_VOP3__V_CMP_NGT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F32 class methods --- - - Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLE_F32 - - Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() - { - } // ~Inst_VOP3__V_CMP_NLE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F32 class methods --- - - Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NEQ_F32 - - Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() - { - } // ~Inst_VOP3__V_CMP_NEQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F32 class methods --- - - Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLT_F32 - - Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() - { - } // ~Inst_VOP3__V_CMP_NLT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F32 class methods --- - - Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_TRU_F32 - - Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() - { - } // ~Inst_VOP3__V_CMP_TRU_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F32 class methods --- - - Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F32 - - Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() - { - } // ~Inst_VOP3__V_CMPX_F_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F32 class methods --- - - Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F32 - - Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() - { - } // ~Inst_VOP3__V_CMPX_LT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F32 class methods --- - - Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F32 - - Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() - { - } // ~Inst_VOP3__V_CMPX_EQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F32 class methods --- - - Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F32 - - Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() - { - } // ~Inst_VOP3__V_CMPX_LE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F32 class methods --- - - Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F32 - - Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() - { - } // ~Inst_VOP3__V_CMPX_GT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F32 class methods --- - - Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F32 - - Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() - { - } // ~Inst_VOP3__V_CMPX_LG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F32 class methods --- - - Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F32 - - Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() - { - } // ~Inst_VOP3__V_CMPX_GE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F32 class methods --- - - Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F32 - - Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() - { - } // ~Inst_VOP3__V_CMPX_O_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F32 class methods --- - - Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F32 - - Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() - { - } // ~Inst_VOP3__V_CMPX_U_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F32 class methods --- - - Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F32 - - Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() - { - } // ~Inst_VOP3__V_CMPX_NGE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F32 class methods --- - - Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F32 - - Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() - { - } // ~Inst_VOP3__V_CMPX_NLG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F32 class methods --- - - Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F32 - - Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() - { - } // ~Inst_VOP3__V_CMPX_NGT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F32 class methods --- - - Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F32 - - Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() - { - } // ~Inst_VOP3__V_CMPX_NLE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F32 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F32 - - Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F32 class methods --- - - Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F32 - - Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() - { - } // ~Inst_VOP3__V_CMPX_NLT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F32 class methods --- - - Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F32 - - Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() - { - } // ~Inst_VOP3__V_CMPX_TRU_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_F64 class methods --- - - Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_F_F64 - - Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() - { - } // ~Inst_VOP3__V_CMP_F_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F64 class methods --- - - Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LT_F64 - - Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() - { - } // ~Inst_VOP3__V_CMP_LT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F64 class methods --- - - Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_EQ_F64 - - Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() - { - } // ~Inst_VOP3__V_CMP_EQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F64 class methods --- - - Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LE_F64 - - Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() - { - } // ~Inst_VOP3__V_CMP_LE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F64 class methods --- - - Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GT_F64 - - Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() - { - } // ~Inst_VOP3__V_CMP_GT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F64 class methods --- - - Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LG_F64 - - Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() - { - } // ~Inst_VOP3__V_CMP_LG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F64 class methods --- - - Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GE_F64 - - Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() - { - } // ~Inst_VOP3__V_CMP_GE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_O_F64 class methods --- - - Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_O_F64 - - Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() - { - } // ~Inst_VOP3__V_CMP_O_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_U_F64 class methods --- - - Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_U_F64 - - Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() - { - } // ~Inst_VOP3__V_CMP_U_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F64 class methods --- - - Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGE_F64 - - Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() - { - } // ~Inst_VOP3__V_CMP_NGE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F64 class methods --- - - Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLG_F64 - - Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() - { - } // ~Inst_VOP3__V_CMP_NLG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F64 class methods --- - - Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGT_F64 - - Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() - { - } // ~Inst_VOP3__V_CMP_NGT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F64 class methods --- - - Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLE_F64 - - Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() - { - } // ~Inst_VOP3__V_CMP_NLE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F64 class methods --- - - Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NEQ_F64 - - Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() - { - } // ~Inst_VOP3__V_CMP_NEQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F64 class methods --- - - Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLT_F64 - - Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() - { - } // ~Inst_VOP3__V_CMP_NLT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F64 class methods --- - - Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_TRU_F64 - - Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() - { - } // ~Inst_VOP3__V_CMP_TRU_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F64 class methods --- - - Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F64 - - Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() - { - } // ~Inst_VOP3__V_CMPX_F_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F64 class methods --- - - Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F64 - - Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() - { - } // ~Inst_VOP3__V_CMPX_LT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F64 class methods --- - - Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F64 - - Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() - { - } // ~Inst_VOP3__V_CMPX_EQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F64 class methods --- - - Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F64 - - Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() - { - } // ~Inst_VOP3__V_CMPX_LE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F64 class methods --- - - Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F64 - - Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() - { - } // ~Inst_VOP3__V_CMPX_GT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F64 class methods --- - - Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F64 - - Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() - { - } // ~Inst_VOP3__V_CMPX_LG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F64 class methods --- - - Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F64 - - Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() - { - } // ~Inst_VOP3__V_CMPX_GE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F64 class methods --- - - Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F64 - - Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() - { - } // ~Inst_VOP3__V_CMPX_O_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F64 class methods --- - - Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F64 - - Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() - { - } // ~Inst_VOP3__V_CMPX_U_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F64 class methods --- - - Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F64 - - Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() - { - } // ~Inst_VOP3__V_CMPX_NGE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F64 class methods --- - - Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F64 - - Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() - { - } // ~Inst_VOP3__V_CMPX_NLG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F64 class methods --- - - Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F64 - - Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() - { - } // ~Inst_VOP3__V_CMPX_NGT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F64 class methods --- - - Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F64 - - Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() - { - } // ~Inst_VOP3__V_CMPX_NLE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F64 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F64 - - Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F64 class methods --- - - Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F64 - - Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() - { - } // ~Inst_VOP3__V_CMPX_NLT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F64 class methods --- - - Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F64 - - Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() - { - } // ~Inst_VOP3__V_CMPX_TRU_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I16 class methods --- - - Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I16 - - Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() - { - } // ~Inst_VOP3__V_CMP_F_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I16 class methods --- - - Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I16 - - Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() - { - } // ~Inst_VOP3__V_CMP_LT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I16 class methods --- - - Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I16 - - Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() - { - } // ~Inst_VOP3__V_CMP_EQ_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I16 class methods --- - - Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I16 - - Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() - { - } // ~Inst_VOP3__V_CMP_LE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I16 class methods --- - - Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I16 - - Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() - { - } // ~Inst_VOP3__V_CMP_GT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I16 class methods --- - - Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I16 - - Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() - { - } // ~Inst_VOP3__V_CMP_NE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I16 class methods --- - - Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I16 - - Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() - { - } // ~Inst_VOP3__V_CMP_GE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I16 class methods --- - - Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I16 - - Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() - { - } // ~Inst_VOP3__V_CMP_T_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U16 class methods --- - - Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U16 - - Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() - { - } // ~Inst_VOP3__V_CMP_F_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U16 class methods --- - - Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U16 - - Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() - { - } // ~Inst_VOP3__V_CMP_LT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U16 class methods --- - - Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U16 - - Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() - { - } // ~Inst_VOP3__V_CMP_EQ_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U16 class methods --- - - Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U16 - - Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() - { - } // ~Inst_VOP3__V_CMP_LE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U16 class methods --- - - Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U16 - - Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() - { - } // ~Inst_VOP3__V_CMP_GT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U16 class methods --- - - Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U16 - - Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() - { - } // ~Inst_VOP3__V_CMP_NE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U16 class methods --- - - Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U16 - - Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() - { - } // ~Inst_VOP3__V_CMP_GE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U16 class methods --- - - Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U16 - - Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() - { - } // ~Inst_VOP3__V_CMP_T_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I16 class methods --- - - Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I16 - - Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() - { - } // ~Inst_VOP3__V_CMPX_F_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I16 class methods --- - - Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I16 - - Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() - { - } // ~Inst_VOP3__V_CMPX_LT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I16 class methods --- - - Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I16 - - Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() - { - } // ~Inst_VOP3__V_CMPX_EQ_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I16 class methods --- - - Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I16 - - Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() - { - } // ~Inst_VOP3__V_CMPX_LE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I16 class methods --- - - Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I16 - - Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() - { - } // ~Inst_VOP3__V_CMPX_GT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I16 class methods --- - - Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I16 - - Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() - { - } // ~Inst_VOP3__V_CMPX_NE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I16 class methods --- - - Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I16 - - Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() - { - } // ~Inst_VOP3__V_CMPX_GE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I16 class methods --- - - Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I16 - - Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() - { - } // ~Inst_VOP3__V_CMPX_T_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U16 class methods --- - - Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U16 - - Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() - { - } // ~Inst_VOP3__V_CMPX_F_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U16 class methods --- - - Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U16 - - Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() - { - } // ~Inst_VOP3__V_CMPX_LT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U16 class methods --- - - Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U16 - - Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() - { - } // ~Inst_VOP3__V_CMPX_EQ_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U16 class methods --- - - Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U16 - - Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() - { - } // ~Inst_VOP3__V_CMPX_LE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U16 class methods --- - - Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U16 - - Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() - { - } // ~Inst_VOP3__V_CMPX_GT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U16 class methods --- - - Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U16 - - Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() - { - } // ~Inst_VOP3__V_CMPX_NE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U16 class methods --- - - Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U16 - - Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() - { - } // ~Inst_VOP3__V_CMPX_GE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U16 class methods --- - - Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U16 - - Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() - { - } // ~Inst_VOP3__V_CMPX_T_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I32 class methods --- - - Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I32 - - Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() - { - } // ~Inst_VOP3__V_CMP_F_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I32 class methods --- - - Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I32 - - Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() - { - } // ~Inst_VOP3__V_CMP_LT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I32 class methods --- - - Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I32 - - Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() - { - } // ~Inst_VOP3__V_CMP_EQ_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I32 class methods --- - - Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I32 - - Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() - { - } // ~Inst_VOP3__V_CMP_LE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I32 class methods --- - - Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I32 - - Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() - { - } // ~Inst_VOP3__V_CMP_GT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I32 class methods --- - - Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I32 - - Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() - { - } // ~Inst_VOP3__V_CMP_NE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I32 class methods --- - - Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I32 - - Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() - { - } // ~Inst_VOP3__V_CMP_GE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I32 class methods --- - - Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I32 - - Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() - { - } // ~Inst_VOP3__V_CMP_T_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U32 class methods --- - - Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U32 - - Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() - { - } // ~Inst_VOP3__V_CMP_F_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U32 class methods --- - - Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U32 - - Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() - { - } // ~Inst_VOP3__V_CMP_LT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U32 class methods --- - - Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U32 - - Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() - { - } // ~Inst_VOP3__V_CMP_EQ_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U32 class methods --- - - Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U32 - - Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() - { - } // ~Inst_VOP3__V_CMP_LE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U32 class methods --- - - Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U32 - - Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() - { - } // ~Inst_VOP3__V_CMP_GT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U32 class methods --- - - Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U32 - - Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() - { - } // ~Inst_VOP3__V_CMP_NE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U32 class methods --- - - Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U32 - - Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() - { - } // ~Inst_VOP3__V_CMP_GE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U32 class methods --- - - Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U32 - - Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() - { - } // ~Inst_VOP3__V_CMP_T_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I32 class methods --- - - Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I32 - - Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() - { - } // ~Inst_VOP3__V_CMPX_F_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I32 class methods --- - - Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I32 - - Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() - { - } // ~Inst_VOP3__V_CMPX_LT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I32 class methods --- - - Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I32 - - Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() - { - } // ~Inst_VOP3__V_CMPX_EQ_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I32 class methods --- - - Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I32 - - Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() - { - } // ~Inst_VOP3__V_CMPX_LE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I32 class methods --- - - Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I32 - - Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() - { - } // ~Inst_VOP3__V_CMPX_GT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I32 class methods --- - - Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I32 - - Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() - { - } // ~Inst_VOP3__V_CMPX_NE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I32 class methods --- - - Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I32 - - Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() - { - } // ~Inst_VOP3__V_CMPX_GE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I32 class methods --- - - Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I32 - - Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() - { - } // ~Inst_VOP3__V_CMPX_T_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U32 class methods --- - - Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U32 - - Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() - { - } // ~Inst_VOP3__V_CMPX_F_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U32 class methods --- - - Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U32 - - Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() - { - } // ~Inst_VOP3__V_CMPX_LT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U32 class methods --- - - Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U32 - - Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() - { - } // ~Inst_VOP3__V_CMPX_EQ_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U32 class methods --- - - Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U32 - - Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() - { - } // ~Inst_VOP3__V_CMPX_LE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U32 class methods --- - - Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U32 - - Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() - { - } // ~Inst_VOP3__V_CMPX_GT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U32 class methods --- - - Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U32 - - Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() - { - } // ~Inst_VOP3__V_CMPX_NE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U32 class methods --- - - Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U32 - - Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() - { - } // ~Inst_VOP3__V_CMPX_GE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U32 class methods --- - - Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U32 - - Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() - { - } // ~Inst_VOP3__V_CMPX_T_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I64 class methods --- - - Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I64 - - Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() - { - } // ~Inst_VOP3__V_CMP_F_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I64 class methods --- - - Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I64 - - Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() - { - } // ~Inst_VOP3__V_CMP_LT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I64 class methods --- - - Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I64 - - Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() - { - } // ~Inst_VOP3__V_CMP_EQ_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I64 class methods --- - - Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I64 - - Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() - { - } // ~Inst_VOP3__V_CMP_LE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I64 class methods --- - - Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I64 - - Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() - { - } // ~Inst_VOP3__V_CMP_GT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I64 class methods --- - - Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I64 - - Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() - { - } // ~Inst_VOP3__V_CMP_NE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I64 class methods --- - - Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I64 - - Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() - { - } // ~Inst_VOP3__V_CMP_GE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I64 class methods --- - - Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I64 - - Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() - { - } // ~Inst_VOP3__V_CMP_T_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U64 class methods --- - - Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U64 - - Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() - { - } // ~Inst_VOP3__V_CMP_F_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U64 class methods --- - - Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U64 - - Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() - { - } // ~Inst_VOP3__V_CMP_LT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U64 class methods --- - - Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U64 - - Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() - { - } // ~Inst_VOP3__V_CMP_EQ_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U64 class methods --- - - Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U64 - - Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() - { - } // ~Inst_VOP3__V_CMP_LE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U64 class methods --- - - Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U64 - - Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() - { - } // ~Inst_VOP3__V_CMP_GT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U64 class methods --- - - Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U64 - - Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() - { - } // ~Inst_VOP3__V_CMP_NE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U64 class methods --- - - Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U64 - - Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() - { - } // ~Inst_VOP3__V_CMP_GE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U64 class methods --- - - Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U64 - - Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() - { - } // ~Inst_VOP3__V_CMP_T_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I64 class methods --- - - Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I64 - - Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() - { - } // ~Inst_VOP3__V_CMPX_F_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I64 class methods --- - - Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I64 - - Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() - { - } // ~Inst_VOP3__V_CMPX_LT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I64 class methods --- - - Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I64 - - Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() - { - } // ~Inst_VOP3__V_CMPX_EQ_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I64 class methods --- - - Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I64 - - Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() - { - } // ~Inst_VOP3__V_CMPX_LE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I64 class methods --- - - Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I64 - - Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() - { - } // ~Inst_VOP3__V_CMPX_GT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I64 class methods --- - - Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I64 - - Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() - { - } // ~Inst_VOP3__V_CMPX_NE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I64 class methods --- - - Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I64 - - Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() - { - } // ~Inst_VOP3__V_CMPX_GE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I64 class methods --- - - Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I64 - - Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() - { - } // ~Inst_VOP3__V_CMPX_T_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U64 class methods --- - - Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U64 - - Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() - { - } // ~Inst_VOP3__V_CMPX_F_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U64 class methods --- - - Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U64 - - Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() - { - } // ~Inst_VOP3__V_CMPX_LT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U64 class methods --- - - Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U64 - - Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() - { - } // ~Inst_VOP3__V_CMPX_EQ_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U64 class methods --- - - Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U64 - - Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() - { - } // ~Inst_VOP3__V_CMPX_LE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U64 class methods --- - - Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U64 - - Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() - { - } // ~Inst_VOP3__V_CMPX_GT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U64 class methods --- - - Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U64 - - Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() - { - } // ~Inst_VOP3__V_CMPX_NE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U64 class methods --- - - Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U64 - - Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() - { - } // ~Inst_VOP3__V_CMPX_GE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U64 class methods --- - - Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U64 - - Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() - { - } // ~Inst_VOP3__V_CMPX_T_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CNDMASK_B32 class methods --- - - Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cndmask_b32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP3__V_CNDMASK_B32 - - Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() - { - } // ~Inst_VOP3__V_CNDMASK_B32 - - // --- description from .arch file --- - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(vcc.rawData(), lane) - ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_F32 class methods --- - - Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_ADD_F32 - - Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() - { - } // ~Inst_VOP3__V_ADD_F32 - - // --- description from .arch file --- - // D.f = S0.f + S1.f. - void - Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_F32 class methods --- - - Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUB_F32 - - Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() - { - } // ~Inst_VOP3__V_SUB_F32 - - // --- description from .arch file --- - // D.f = S0.f - S1.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_F32 class methods --- - - Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUBREV_F32 - - Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() - { - } // ~Inst_VOP3__V_SUBREV_F32 - - // --- description from .arch file --- - // D.f = S1.f - S0.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods --- - - Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_LEGACY_F32 - - Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP3__V_MUL_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). - void - Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_F32 class methods --- - - Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_F32 - - Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() - { - } // ~Inst_VOP3__V_MUL_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f. - void - Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_I32_I24 class methods --- - - Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_I32_I24 - - Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() - { - } // ~Inst_VOP3__V_MUL_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods --- - - Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32_I24 - - Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP3__V_MUL_HI_I32_I24 - - // --- description from .arch file --- - // D.i = (S0.i[23:0] * S1.i[23:0])>>32. - void - Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 - = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); - VecElemI64 tmp_src1 - = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_U32_U24 class methods --- - - Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_U32_U24 - - Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() - { - } // ~Inst_VOP3__V_MUL_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods --- - - Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32_U24 - - Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP3__V_MUL_HI_U32_U24 - - // --- description from .arch file --- - // D.i = (S0.u[23:0] * S1.u[23:0])>>32. - void - Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_F32 class methods --- - - Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN_F32 - - Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() - { - } // ~Inst_VOP3__V_MIN_F32 - - // --- description from .arch file --- - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F32 class methods --- - - Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX_F32 - - Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() - { - } // ~Inst_VOP3__V_MAX_F32 - - // --- description from .arch file --- - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_I32 class methods --- - - Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I32 - - Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() - { - } // ~Inst_VOP3__V_MIN_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i). - void - Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_I32 class methods --- - - Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I32 - - Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() - { - } // ~Inst_VOP3__V_MAX_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i). - void - Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_U32 class methods --- - - Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U32 - - Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() - { - } // ~Inst_VOP3__V_MIN_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u). - void - Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_U32 class methods --- - - Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U32 - - Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() - { - } // ~Inst_VOP3__V_MAX_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u). - void - Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B32 class methods --- - - Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B32 - - Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() - { - } // ~Inst_VOP3__V_LSHRREV_B32 - - // --- description from .arch file --- - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I32 class methods --- - - Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I32 - - Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() - { - } // ~Inst_VOP3__V_ASHRREV_I32 - - // --- description from .arch file --- - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B32 class methods --- - - Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B32 - - Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() - { - } // ~Inst_VOP3__V_LSHLREV_B32 - - // --- description from .arch file --- - // D.u = S1.u << S0.u[4:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_AND_B32 class methods --- - - Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_and_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_AND_B32 - - Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() - { - } // ~Inst_VOP3__V_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_OR_B32 class methods --- - - Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_OR_B32 - - Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() - { - } // ~Inst_VOP3__V_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_OR3_B32 class methods --- - - Inst_VOP3__V_OR3_B32::Inst_VOP3__V_OR3_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_or3_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_OR3_B32 - - Inst_VOP3__V_OR3_B32::~Inst_VOP3__V_OR3_B32() - { - } // ~Inst_VOP3__V_OR3_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u | S2.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_OR3_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane] | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_XOR_B32 class methods --- - - Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_xor_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_XOR_B32 - - Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() - { - } // ~Inst_VOP3__V_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAC_F32 class methods --- - - Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mac_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F32 - - Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() - { - } // ~Inst_VOP3__V_MAC_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + D.f. - // SQ translates to V_MAD_F32. - void - Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vdst.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_CO_U32 class methods --- - - Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_add_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_ADD_CO_U32 - - Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32() - { - } // ~Inst_VOP3__V_ADD_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED - // --- overflow or carry-out for V_ADDC_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_SUB_CO_U32 class methods --- - - Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_sub_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUB_CO_U32 - - Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32() - { - } // ~Inst_VOP3__V_SUB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_CO_U32 class methods --- - - Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUBREV_CO_U32 - - Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32() - { - } // ~Inst_VOP3__V_SUBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - // SQ translates this to V_SUB_U32 with reversed operands. - void - Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_ADDC_CO_U32 class methods --- - - Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_addc_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_ADDC_CO_U32 - - Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32() - { - } // ~Inst_VOP3__V_ADDC_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - sdst.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_SUBB_CO_U32 class methods --- - - Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subb_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBB_CO_U32 - - Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32() - { - } // ~Inst_VOP3__V_SUBB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // --- overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // --- source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods --- - - Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subbrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBBREV_CO_U32 - - Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32() - { - } // ~Inst_VOP3__V_SUBBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. - void - Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_F16 class methods --- - - Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_ADD_F16 - - Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() - { - } // ~Inst_VOP3__V_ADD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SUB_F16 class methods --- - - Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUB_F16 - - Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() - { - } // ~Inst_VOP3__V_SUB_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 - S1.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SUBREV_F16 class methods --- - - Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUBREV_F16 - - Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() - { - } // ~Inst_VOP3__V_SUBREV_F16 - - // --- description from .arch file --- - // D.f16 = S1.f16 - S0.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MUL_F16 class methods --- - - Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MUL_F16 - - Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() - { - } // ~Inst_VOP3__V_MUL_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAC_F16 class methods --- - - Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mac_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F16 - - Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() - { - } // ~Inst_VOP3__V_MAC_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + D.f16. - // Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_U16 class methods --- - - Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_U16 - - Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() - { - } // ~Inst_VOP3__V_ADD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 + S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_U16 class methods --- - - Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUB_U16 - - Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() - { - } // ~Inst_VOP3__V_SUB_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 - S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_U16 class methods --- - - Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUBREV_U16 - - Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() - { - } // ~Inst_VOP3__V_SUBREV_U16 - - // --- description from .arch file --- - // D.u16 = S1.u16 - S0.u16. - // Supports saturation (unsigned 16-bit integer domain). - // SQ translates this to V_SUB_U16 with reversed operands. - void - Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LO_U16 class methods --- - - Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_lo_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U16 - - Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() - { - } // ~Inst_VOP3__V_MUL_LO_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B16 class methods --- - - Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B16 - - Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() - { - } // ~Inst_VOP3__V_LSHLREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B16 class methods --- - - Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B16 - - Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() - { - } // ~Inst_VOP3__V_LSHRREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I16 class methods --- - - Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I16 - - Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() - { - } // ~Inst_VOP3__V_ASHRREV_I16 - - // --- description from .arch file --- - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F16 class methods --- - - Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MAX_F16 - - Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() - { - } // ~Inst_VOP3__V_MAX_F16 - - // --- description from .arch file --- - // D.f16 = max(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MIN_F16 class methods --- - - Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MIN_F16 - - Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() - { - } // ~Inst_VOP3__V_MIN_F16 - - // --- description from .arch file --- - // D.f16 = min(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAX_U16 class methods --- - - Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U16 - - Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() - { - } // ~Inst_VOP3__V_MAX_U16 - - // --- description from .arch file --- - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_I16 class methods --- - - Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I16 - - Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() - { - } // ~Inst_VOP3__V_MAX_I16 - - // --- description from .arch file --- - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_U16 class methods --- - - Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U16 - - Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() - { - } // ~Inst_VOP3__V_MIN_U16 - - // --- description from .arch file --- - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_I16 class methods --- - - Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I16 - - Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() - { - } // ~Inst_VOP3__V_MIN_I16 - - // --- description from .arch file --- - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F16 class methods --- - - Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LDEXP_F16 - - Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() - { - } // ~Inst_VOP3__V_LDEXP_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_U32 class methods --- - - Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_U32 - - Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() - { - } // ~Inst_VOP3__V_ADD_U32 - - // --- description from .arch file --- - // D.u32 = S0.u32 + S1.u32. - void - Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_U32 class methods --- - - Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUB_U32 - - Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() - { - } // ~Inst_VOP3__V_SUB_U32 - - // --- description from .arch file --- - // D.u32 = S0.u32 - S1.u32. - void - Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_U32 class methods --- - - Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUBREV_U32 - - Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() - { - } // ~Inst_VOP3__V_SUBREV_U32 - - // --- description from .arch file --- - // D.u32 = S1.u32 - S0.u32. - void - Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_NOP class methods --- - - Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_nop", false) - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP3__V_NOP - - Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() - { - } // ~Inst_VOP3__V_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_VOP3__V_MOV_B32 class methods --- - - Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mov_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_B32 - - Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() - { - } // ~Inst_VOP3__V_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_I32_F64 class methods --- - - Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_I32_F64 - - Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() - { - } // ~Inst_VOP3__V_CVT_I32_F64 - - // --- description from .arch file --- - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_I32 class methods --- - - Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_I32 - - Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() - { - } // ~Inst_VOP3__V_CVT_F64_I32 - - // --- description from .arch file --- - // D.d = (double)S0.i. - void - Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_I32 class methods --- - - Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_I32 - - Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() - { - } // ~Inst_VOP3__V_CVT_F32_I32 - - // --- description from .arch file --- - // D.f = (float)S0.i. - void - Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - VecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_U32 class methods --- - - Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_U32 - - Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() - { - } // ~Inst_VOP3__V_CVT_F32_U32 - - // --- description from .arch file --- - // D.f = (float)S0.u. - void - Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_U32_F32 class methods --- - - Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_U32_F32 - - Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() - { - } // ~Inst_VOP3__V_CVT_U32_F32 - - // --- description from .arch file --- - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_I32_F32 class methods --- - - Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_I32_F32 - - Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() - { - } // ~Inst_VOP3__V_CVT_I32_F32 - - // --- description from .arch file --- - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MOV_FED_B32 class methods --- - - Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mov_fed_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_FED_B32 - - Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() - { - } // ~Inst_VOP3__V_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u; - // Introduce EDC double error upon write to dest vgpr without causing an - // --- exception. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_F32 class methods --- - - Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F16_F32 - - Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() - { - } // ~Inst_VOP3__V_CVT_F16_F32 - - // --- description from .arch file --- - // D.f16 = flt32_to_flt16(S0.f). - // Supports input modifiers and creates FP16 denormals when appropriate. - void - Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F32_F16 class methods --- - - Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_F16 - - Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() - { - } // ~Inst_VOP3__V_CVT_F32_F16 - - // --- description from .arch file --- - // D.f = flt16_to_flt32(S0.f16). - // FP16 denormal inputs are always accepted. - void - Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods --- - - Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_RPI_I32_F32 - - Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP3__V_CVT_RPI_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods --- - - Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_FLR_I32_F32 - - Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP3__V_CVT_FLR_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f). - void - Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods --- - - Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_OFF_F32_I4 - - Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP3__V_CVT_OFF_F32_I4 - - // --- description from .arch file --- - // 4-bit signed int to 32-bit float. Used for interpolation in shader. - void - Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - // Could not parse sq_uc.arch desc field - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F32_F64 class methods --- - - Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F32_F64 - - Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() - { - } // ~Inst_VOP3__V_CVT_F32_F64 - - // --- description from .arch file --- - // D.f = (float)S0.d. - void - Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_F32 class methods --- - - Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_F32 - - Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() - { - } // ~Inst_VOP3__V_CVT_F64_F32 - - // --- description from .arch file --- - // D.d = (double)S0.f. - void - Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE0 - - Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE0 - - // --- description from .arch file --- - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE1 - - Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE1 - - // --- description from .arch file --- - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE2 - - Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE2 - - // --- description from .arch file --- - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE3 - - Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE3 - - // --- description from .arch file --- - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_U32_F64 class methods --- - - Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_U32_F64 - - Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() - { - } // ~Inst_VOP3__V_CVT_U32_F64 - - // --- description from .arch file --- - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_U32 class methods --- - - Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_U32 - - Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() - { - } // ~Inst_VOP3__V_CVT_F64_U32 - - // --- description from .arch file --- - // D.d = (double)S0.u. - void - Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRUNC_F64 class methods --- - - Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRUNC_F64 - - Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() - { - } // ~Inst_VOP3__V_TRUNC_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CEIL_F64 class methods --- - - Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CEIL_F64 - - Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() - { - } // ~Inst_VOP3__V_CEIL_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. - void - Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RNDNE_F64 class methods --- - - Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RNDNE_F64 - - Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() - { - } // ~Inst_VOP3__V_RNDNE_F64 - - // --- description from .arch file --- - // D.d = round_nearest_even(S0.d). - void - Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FLOOR_F64 class methods --- - - Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FLOOR_F64 - - Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() - { - } // ~Inst_VOP3__V_FLOOR_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. - void - Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FRACT_F32 class methods --- - - Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FRACT_F32 - - Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() - { - } // ~Inst_VOP3__V_FRACT_F32 - - // --- description from .arch file --- - // D.f = S0.f - floor(S0.f). - void - Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRUNC_F32 class methods --- - - Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_TRUNC_F32 - - Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() - { - } // ~Inst_VOP3__V_TRUNC_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CEIL_F32 class methods --- - - Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CEIL_F32 - - Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() - { - } // ~Inst_VOP3__V_CEIL_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. - void - Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RNDNE_F32 class methods --- - - Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RNDNE_F32 - - Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() - { - } // ~Inst_VOP3__V_RNDNE_F32 - - // --- description from .arch file --- - // D.f = round_nearest_even(S0.f). - void - Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FLOOR_F32 class methods --- - - Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FLOOR_F32 - - Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() - { - } // ~Inst_VOP3__V_FLOOR_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. - void - Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_EXP_F32 class methods --- - - Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_F32 - - Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() - { - } // ~Inst_VOP3__V_EXP_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f). - void - Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LOG_F32 class methods --- - - Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_F32 - - Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() - { - } // ~Inst_VOP3__V_LOG_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm. - void - Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_F32 class methods --- - - Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_F32 - - Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() - { - } // ~Inst_VOP3__V_RCP_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. - void - Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods --- - - Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_IFLAG_F32 - - Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP3__V_RCP_IFLAG_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise - // --- integer DIV_BY_ZERO exception but cannot raise floating-point - // --- exceptions. - void - Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RSQ_F32 class methods --- - - Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RSQ_F32 - - Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() - { - } // ~Inst_VOP3__V_RSQ_F32 - - // --- description from .arch file --- - // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. - void - Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_F64 class methods --- - - Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RCP_F64 - - Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() - { - } // ~Inst_VOP3__V_RCP_F64 - - // --- description from .arch file --- - // D.d = 1.0 / S0.d. - void - Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RSQ_F64 class methods --- - - Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RSQ_F64 - - Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() - { - } // ~Inst_VOP3__V_RSQ_F64 - - // --- description from .arch file --- - // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. - void - Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SQRT_F32 class methods --- - - Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SQRT_F32 - - Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() - { - } // ~Inst_VOP3__V_SQRT_F32 - - // --- description from .arch file --- - // D.f = sqrt(S0.f). - void - Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SQRT_F64 class methods --- - - Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_SQRT_F64 - - Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() - { - } // ~Inst_VOP3__V_SQRT_F64 - - // --- description from .arch file --- - // D.d = sqrt(S0.d). - void - Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SIN_F32 class methods --- - - Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sin_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SIN_F32 - - Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() - { - } // ~Inst_VOP3__V_SIN_F32 - - // --- description from .arch file --- - // D.f = sin(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 0.0. - void - Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_COS_F32 class methods --- - - Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cos_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_COS_F32 - - Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() - { - } // ~Inst_VOP3__V_COS_F32 - - // --- description from .arch file --- - // D.f = cos(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 1.0. - void - Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_NOT_B32 class methods --- - - Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_not_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_NOT_B32 - - Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() - { - } // ~Inst_VOP3__V_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFREV_B32 class methods --- - - Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFREV_B32 - - Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() - { - } // ~Inst_VOP3__V_BFREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBH_U32 class methods --- - - Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbh_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_U32 - - Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() - { - } // ~Inst_VOP3__V_FFBH_U32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBL_B32 class methods --- - - Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbl_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBL_B32 - - Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() - { - } // ~Inst_VOP3__V_FFBL_B32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBH_I32 class methods --- - - Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbh_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_I32 - - Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() - { - } // ~Inst_VOP3__V_FFBH_I32 - - // --- description from .arch file --- - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods --- - - Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_EXP_I32_F64 - - Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 - - // --- description from .arch file --- - // See V_FREXP_EXP_I32_F32. - void - Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F64 class methods --- - - Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_MANT_F64 - - Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() - { - } // ~Inst_VOP3__V_FREXP_MANT_F64 - - // --- description from .arch file --- - // See V_FREXP_MANT_F32. - void - Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FRACT_F64 class methods --- - - Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FRACT_F64 - - Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() - { - } // ~Inst_VOP3__V_FRACT_F64 - - // --- description from .arch file --- - // See V_FRACT_F32. - void - Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods --- - - Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_EXP_I32_F32 - - Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). - // Returns exponent of single precision float input, such that S0.f = - // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns - // the significand. - void - Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane])|| std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F32 class methods --- - - Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_MANT_F32 - - Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() - { - } // ~Inst_VOP3__V_FREXP_MANT_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = Mantissa(S0.f). - // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary - // --- significand of single precision float input, such that S0.f = - // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which - // --- returns integer exponent. - void - Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CLREXCP class methods --- - - Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_clrexcp", false) - { - } // Inst_VOP3__V_CLREXCP - - Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() - { - } // ~Inst_VOP3__V_CLREXCP - - // --- description from .arch file --- - // Clear wave's exception state in SIMD (SP). - void - Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_U16 class methods --- - - Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_U16 - - Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() - { - } // ~Inst_VOP3__V_CVT_F16_U16 - - // --- description from .arch file --- - // D.f16 = uint16_to_flt16(S.u16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_I16 class methods --- - - Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_I16 - - Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() - { - } // ~Inst_VOP3__V_CVT_F16_I16 - - // --- description from .arch file --- - // D.f16 = int16_to_flt16(S.i16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_U16_F16 class methods --- - - Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_U16_F16 - - Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() - { - } // ~Inst_VOP3__V_CVT_U16_F16 - - // --- description from .arch file --- - // D.u16 = flt16_to_uint16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_I16_F16 class methods --- - - Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_I16_F16 - - Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() - { - } // ~Inst_VOP3__V_CVT_I16_F16 - - // --- description from .arch file --- - // D.i16 = flt16_to_int16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RCP_F16 class methods --- - - Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RCP_F16 - - Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() - { - } // ~Inst_VOP3__V_RCP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecip(S0.f16). - void - Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SQRT_F16 class methods --- - - Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SQRT_F16 - - Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() - { - } // ~Inst_VOP3__V_SQRT_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateSqrt(S0.f16). - void - Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RSQ_F16 class methods --- - - Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RSQ_F16 - - Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() - { - } // ~Inst_VOP3__V_RSQ_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecipSqrt(S0.f16). - void - Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_LOG_F16 class methods --- - - Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LOG_F16 - - Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() - { - } // ~Inst_VOP3__V_LOG_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = ApproximateLog2(S0.f16). - void - Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_EXP_F16 class methods --- - - Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_EXP_F16 - - Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() - { - } // ~Inst_VOP3__V_EXP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = Approximate2ToX(S0.f16). - void - Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F16 class methods --- - - Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_MANT_F16 - - Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() - { - } // ~Inst_VOP3__V_FREXP_MANT_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - // Result range is (-1.0,-0.5][0.5,1.0). - // C math library frexp function. - // Returns binary significand of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods --- - - Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_EXP_I16_F16 - - Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.i16 = 0; - // else - // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). - // C math library frexp function. - // Returns exponent of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FLOOR_F16 class methods --- - - Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FLOOR_F16 - - Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() - { - } // ~Inst_VOP3__V_FLOOR_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. - void - Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CEIL_F16 class methods --- - - Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CEIL_F16 - - Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() - { - } // ~Inst_VOP3__V_CEIL_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. - void - Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_TRUNC_F16 class methods --- - - Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_TRUNC_F16 - - Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() - { - } // ~Inst_VOP3__V_TRUNC_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16). - // Round-to-zero semantics. - void - Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RNDNE_F16 class methods --- - - Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RNDNE_F16 - - Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() - { - } // ~Inst_VOP3__V_RNDNE_F16 - - // --- description from .arch file --- - // D.f16 = FLOOR(S0.f16 + 0.5f); - // if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. - // Round-to-nearest-even semantics. - void - Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FRACT_F16 class methods --- - - Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FRACT_F16 - - Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() - { - } // ~Inst_VOP3__V_FRACT_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SIN_F16 class methods --- - - Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sin_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SIN_F16 - - Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() - { - } // ~Inst_VOP3__V_SIN_F16 - - // --- description from .arch file --- - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_COS_F16 class methods --- - - Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cos_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_COS_F16 - - Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() - { - } // ~Inst_VOP3__V_COS_F16 - - // --- description from .arch file --- - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_EXP_LEGACY_F32 class methods --- - - Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_LEGACY_F32 - - Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP3__V_EXP_LEGACY_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f) with legacy semantics. - void - Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LOG_LEGACY_F32 class methods --- - - Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_LEGACY_F32 - - Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP3__V_LOG_LEGACY_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. - void - Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_LEGACY_F32 class methods --- - - Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_LEGACY_F32 - - Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() - { - } // ~Inst_VOP3__V_MAD_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f (DX9 rules, 0.0 * x = 0.0). - void - Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_F32 class methods --- - - Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F32 - - Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() - { - } // ~Inst_VOP3__V_MAD_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I32_I24 class methods --- - - Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_i32_i24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I32_I24 - - Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() - { - } // ~Inst_VOP3__V_MAD_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. - void - Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_U32_U24 class methods --- - - Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_u32_u24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U32_U24 - - Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() - { - } // ~Inst_VOP3__V_MAD_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. - void - Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) - + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CUBEID_F32 class methods --- - - Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubeid_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEID_F32 - - Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() - { - } // ~Inst_VOP3__V_CUBEID_F32 - - // --- description from .arch file --- - // D.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). XYZ coordinate is given in - // --- (S0.f, S1.f, S2.f). - void - Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBESC_F32 class methods --- - - Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubesc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBESC_F32 - - Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() - { - } // ~Inst_VOP3__V_CUBESC_F32 - - // --- description from .arch file --- - // D.f = cubemap S coordinate. XYZ coordinate is given in (S0.f, S1.f, - // S2.f). - void - Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBETC_F32 class methods --- - - Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubetc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBETC_F32 - - Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() - { - } // ~Inst_VOP3__V_CUBETC_F32 - - // --- description from .arch file --- - // D.f = cubemap T coordinate. XYZ coordinate is given in (S0.f, S1.f, - // S2.f). - void - Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBEMA_F32 class methods --- - - Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubema_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEMA_F32 - - Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() - { - } // ~Inst_VOP3__V_CUBEMA_F32 - - // --- description from .arch file --- - // D.f = 2.0 * cubemap major axis. XYZ coordinate is given in (S0.f, S1.f, - // --- S2.f). - void - Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_BFE_U32 class methods --- - - Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfe_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_U32 - - Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() - { - } // ~Inst_VOP3__V_BFE_U32 - - // --- description from .arch file --- - // D.u = (S0.u>>S1.u[4:0]) & ((1<wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFE_I32 class methods --- - - Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfe_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_I32 - - Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() - { - } // ~Inst_VOP3__V_BFE_I32 - - // --- description from .arch file --- - // D.i = (S0.i>>S1.u[4:0]) & ((1<wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - - // Above extracted a signed int of size src2 bits which needs - // to be signed-extended. Check if the MSB of our src2-bit - // integer is 1, and sign extend it is. - if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) { - vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFI_B32 class methods --- - - Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfi_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFI_B32 - - Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() - { - } // ~Inst_VOP3__V_BFI_B32 - - // --- description from .arch file --- - // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. - void - Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] - & src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F32 class methods --- - - Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F32 - - Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() - { - } // ~Inst_VOP3__V_FMA_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F64 class methods --- - - Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f64", false) - { - setFlag(ALU); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F64 - - Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() - { - } // ~Inst_VOP3__V_FMA_F64 - - // --- description from .arch file --- - // D.d = S0.d * S1.d + S2.d. - void - Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LERP_U8 class methods --- - - Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lerp_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LERP_U8 - - Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() - { - } // ~Inst_VOP3__V_LERP_U8 - - // --- description from .arch file --- - // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 - // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; - // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; - // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). - // Unsigned 8-bit pixel average on packed unsigned bytes (linear - // --- interpolation). S2 acts as a round mode; if set, 0.5 rounds up, - // --- otherwise 0.5 truncates. - void - Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((bits(src0[lane], 31, 24) - + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) - << 24; - vdst[lane] += ((bits(src0[lane], 23, 16) - + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) - << 16; - vdst[lane] += ((bits(src0[lane], 15, 8) - + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) - << 8; - vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) - + bits(src2[lane], 0)) >> 1); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ALIGNBIT_B32 class methods --- - - Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_alignbit_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBIT_B32 - - Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() - { - } // ~Inst_VOP3__V_ALIGNBIT_B32 - - // --- description from .arch file --- - // D.u = ({S0,S1} >> S2.u[4:0]) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ALIGNBYTE_B32 class methods --- - - Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_alignbyte_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBYTE_B32 - - Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() - { - } // ~Inst_VOP3__V_ALIGNBYTE_B32 - - // --- description from .arch file --- - // D.u = ({S0,S1} >> (8*S2.u[4:0])) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) - & 0xffffffff); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_F32 class methods --- - - Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN3_F32 - - Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() - { - } // ~Inst_VOP3__V_MIN3_F32 - - // --- description from .arch file --- - // D.f = min(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); - vdst[lane] = std::fmin(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_I32 class methods --- - - Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_I32 - - Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() - { - } // ~Inst_VOP3__V_MIN3_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_U32 class methods --- - - Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_U32 - - Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() - { - } // ~Inst_VOP3__V_MIN3_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_F32 class methods --- - - Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX3_F32 - - Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() - { - } // ~Inst_VOP3__V_MAX3_F32 - - // --- description from .arch file --- - // D.f = max(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); - vdst[lane] = std::fmax(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_I32 class methods --- - - Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_I32 - - Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() - { - } // ~Inst_VOP3__V_MAX3_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_U32 class methods --- - - Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_U32 - - Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() - { - } // ~Inst_VOP3__V_MAX3_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_F32 class methods --- - - Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MED3_F32 - - Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() - { - } // ~Inst_VOP3__V_MED3_F32 - - // --- description from .arch file --- - // D.f = median(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_I32 class methods --- - - Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_I32 - - Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() - { - } // ~Inst_VOP3__V_MED3_I32 - - // --- description from .arch file --- - // D.i = median(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_U32 class methods --- - - Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_U32 - - Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() - { - } // ~Inst_VOP3__V_MED3_U32 - - // --- description from .arch file --- - // D.u = median(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U8 class methods --- - - Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U8 - - Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() - { - } // ~Inst_VOP3__V_SAD_U8 - - // --- description from .arch file --- - // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + - // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. - // Sum of absolute differences with accumulation, overflow into upper bits - // is allowed. - void - Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) - + std::abs(bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) - + std::abs(bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) - + std::abs(bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_HI_U8 class methods --- - - Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_hi_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_HI_U8 - - Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() - { - } // ~Inst_VOP3__V_SAD_HI_U8 - - // --- description from .arch file --- - // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. - // Sum of absolute differences with accumulation, overflow is lost. - void - Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0))) << 16) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U16 class methods --- - - Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U16 - - Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() - { - } // ~Inst_VOP3__V_SAD_U16 - - // --- description from .arch file --- - // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) - // + S2.u. - // Word SAD with accumulation. - void - Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 16) - - bits(src1[lane], 31, 16)) - + std::abs(bits(src0[lane], 15, 0) - - bits(src1[lane], 15, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U32 class methods --- - - Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U32 - - Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() - { - } // ~Inst_VOP3__V_SAD_U32 - - // --- description from .arch file --- - // D.u = abs(S0.i - S1.i) + S2.u. - // Dword SAD with accumulation. - void - Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; - } // if - } // for - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_PK_U8_F32 class methods --- - - Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PK_U8_F32 - - Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PK_U8_F32 - - // --- description from .arch file --- - // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) - // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). - // Convert floating point value S0 to 8-bit unsigned integer and pack the - // result into byte S1 of dword S2. - void - Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((VecElemU8)src0[lane] & 0xff) - << (8 * bits(src1[lane], 1, 0))) - | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F32 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_DIV_FIXUP_F32 - - Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F32 - - // --- description from .arch file --- - // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, - // s2.f = Numerator. This opcode generates exceptions resulting from the - // division operation. - void - Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src1[lane])) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else { - vdst[lane] = src2[lane] / src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_DIV_FIXUP_F64 - - Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F64 - - // --- description from .arch file --- - // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, - // s2.d = Numerator. This opcode generates exceptions resulting from the - // division operation. - void - Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int sign_out = std::signbit(src1[lane]) - ^ std::signbit(src2[lane]); - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - - if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { - vdst[lane] = std::numeric_limits::quiet_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - && std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - || std::isinf(src2[lane])) { - vdst[lane] = sign_out ? -INFINITY : +INFINITY; - } else if (std::isinf(src1[lane]) - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = sign_out ? -0.0 : +0.0; - } else if (exp2 - exp1 < -1075) { - vdst[lane] = src0[lane]; - } else if (exp1 == 2047) { - vdst[lane] = src0[lane]; - } else { - vdst[lane] = sign_out ? -std::fabs(src0[lane]) - : std::fabs(src0[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_SCALE_F32 class methods --- - - Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_div_scale_f32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F32); - } // Inst_VOP3__V_DIV_SCALE_F32 - - Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() - { - } // ~Inst_VOP3__V_DIV_SCALE_F32 - - // --- description from .arch file --- - // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = - // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane]; - vcc.setBit(lane, 0); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- - - Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_div_scale_f64") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F64); - } // Inst_VOP3__V_DIV_SCALE_F64 - - Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() - { - } // ~Inst_VOP3__V_DIV_SCALE_F64 - - // --- description from .arch file --- - // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = - // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - vcc.setBit(lane, 0); - - if (std::fpclassify(src1[lane]) == FP_ZERO - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (exp2 - exp1 >= 768) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], 128); - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL - && std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], -128); - } else if (std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src2[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (exp2 <= 53) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FMAS_F32 class methods --- - - Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fmas_f32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F32 - - Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() - { - } // ~Inst_VOP3__V_DIV_FMAS_F32 - - // --- description from .arch file --- - // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, - // s1.f = Denominator, s2.f = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - //vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- - - Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fmas_f64", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F64 - - Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() - { - } // ~Inst_VOP3__V_DIV_FMAS_F64 - - // --- description from .arch file --- - // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, - // s1.d = Denominator, s2.d = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vcc.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(vcc.rawData(), lane)) { - vdst[lane] = std::pow(2, 64) - * std::fma(src0[lane], src1[lane], src2[lane]); - } else { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MSAD_U8 class methods --- - - Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_msad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MSAD_U8 - - Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() - { - } // ~Inst_VOP3__V_MSAD_U8 - - // --- description from .arch file --- - // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_QSAD_PK_U16_U8 class methods --- - - Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_qsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_QSAD_PK_U16_U8 - - Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_QSAD_PK_U16_U8 - - // --- description from .arch file --- - // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MQSAD_PK_U16_U8 class methods --- - - Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mqsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_PK_U16_U8 - - Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 - - // --- description from .arch file --- - // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // --- S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MQSAD_U32_U8 class methods --- - - Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mqsad_u32_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_U32_U8 - - Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() - { - } // ~Inst_VOP3__V_MQSAD_U32_U8 - - // --- description from .arch file --- - // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], - // --- S1.u[31:0], S2.u[127:0]) - void - Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAD_U64_U32 class methods --- - - Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_mad_u64_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U64_U32 - - Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() - { - } // ~Inst_VOP3__V_MAD_U64_U32 - - // --- description from .arch file --- - // {vcc_out,D.u64} = S0.u32 * S1.u32 + S2.u64. - void - Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I64_I32 class methods --- - - Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_mad_i64_i32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I64_I32 - - Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() - { - } // ~Inst_VOP3__V_MAD_I64_I32 - - // --- description from .arch file --- - // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. - void - Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandI64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_XAD_U32 class methods --- - - Inst_VOP3__V_XAD_U32::Inst_VOP3__V_XAD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_xad_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_XAD_U32 - - Inst_VOP3__V_XAD_U32::~Inst_VOP3__V_XAD_U32() - { - } // ~Inst_VOP3__V_XAD_U32 - - // --- description from .arch file --- - // D.u32 = (S0.u32 ^ S1.u32) + S2.u32. - void - Inst_VOP3__V_XAD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHL_ADD_U32 class methods --- - - Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshl_add_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHL_ADD_U32 - - Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32() - { - } // ~Inst_VOP3__V_LSHL_ADD_U32 - - // --- description from .arch file --- - // D.u = (S0.u << S1.u[4:0]) + S2.u. - void - Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) - + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_LSHL_U32 class methods --- - - Inst_VOP3__V_ADD_LSHL_U32::Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_lshl_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_LSHL_U32 - - Inst_VOP3__V_ADD_LSHL_U32::~Inst_VOP3__V_ADD_LSHL_U32() - { - } // ~Inst_VOP3__V_ADD_LSHL_U32 - - // --- description from .arch file --- - // D.u = (S0.u + S1.u) << S2.u[4:0]. - void - Inst_VOP3__V_ADD_LSHL_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = - (src0[lane] + src1[lane]) << bits(src2[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD3_U32 class methods --- - - Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD3_U32 - - Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32() - { - } // ~Inst_VOP3__V_ADD3_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + S2.u. - void - Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHL_OR_B32 class methods --- - - Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshl_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHL_OR_B32 - - Inst_VOP3__V_LSHL_OR_B32::~Inst_VOP3__V_LSHL_OR_B32() - { - } // ~Inst_VOP3__V_LSHL_OR_B32 - - // --- description from .arch file --- - // D.u = (S0.u << S1.u[4:0]) | S2.u. - void - Inst_VOP3__V_LSHL_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) - | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_AND_OR_B32 class methods --- - - Inst_VOP3__V_AND_OR_B32::Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_and_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_AND_OR_B32 - - Inst_VOP3__V_AND_OR_B32::~Inst_VOP3__V_AND_OR_B32() - { - } // ~Inst_VOP3__V_AND_OR_B32 - - // --- description from .arch file --- - // D.u = (S0.u & S1.u) | S2.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_AND_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] & src1[lane]) | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_F16 class methods --- - - Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F16 - - Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() - { - } // ~Inst_VOP3__V_MAD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Supports round mode, exception flags, saturation. - void - Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAD_U16 class methods --- - - Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_u16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U16 - - Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() - { - } // ~Inst_VOP3__V_MAD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16 + S2.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I16 class methods --- - - Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_i16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I16 - - Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() - { - } // ~Inst_VOP3__V_MAD_I16 - - // --- description from .arch file --- - // D.i16 = S0.i16 * S1.i16 + S2.i16. - // Supports saturation (signed 16-bit integer domain). - void - Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_PERM_B32 class methods --- - - Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_perm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_PERM_B32 - - Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() - { - } // ~Inst_VOP3__V_PERM_B32 - - // --- description from .arch file --- - // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); - // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); - // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); - // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); - // byte permute(byte in[8], byte sel) { - // if(sel>=13) then return 0xff; - // elsif(sel==12) then return 0x00; - // elsif(sel==11) then return in[7][7] * 0xff; - // elsif(sel==10) then return in[5][7] * 0xff; - // elsif(sel==9) then return in[3][7] * 0xff; - // elsif(sel==8) then return in[1][7] * 0xff; - // else return in[sel]; - // } - // Byte permute. - void - Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 selector = (VecElemU64)src0[lane]; - selector = (selector << 32) | (VecElemU64)src1[lane]; - vdst[lane] = 0; - - DPRINTF(VEGA, "Executing v_perm_b32 src_0 0x%08x, src_1 " - "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], - src1[lane], src2[lane], vdst[lane]); - DPRINTF(VEGA, "Selector: 0x%08x \n", selector); - - for (int i = 0; i < 4 ; ++i) { - VecElemU32 permuted_val = permute(selector, 0xFF - & ((VecElemU32)src2[lane] >> (8 * i))); - vdst[lane] |= (permuted_val << (8 * i)); - } - - DPRINTF(VEGA, "v_perm result: 0x%08x\n", vdst[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F16 class methods --- - - Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F16 - - Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() - { - } // ~Inst_VOP3__V_FMA_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Fused half precision multiply add. - void - Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F16 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_DIV_FIXUP_F16 - - Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F16 - - // --- description from .arch file --- - // sign_out = sign(S1.f16)^sign(S2.f16); - // if (S2.f16 == NAN) - // D.f16 = Quiet(S2.f16); - // else if (S1.f16 == NAN) - // D.f16 = Quiet(S1.f16); - // else if (S1.f16 == S2.f16 == 0) - // # 0/0 - // D.f16 = pele_nan(0xfe00); - // else if (abs(S1.f16) == abs(S2.f16) == +-INF) - // # inf/inf - // D.f16 = pele_nan(0xfe00); - // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) - // # x/0, or inf/y - // D.f16 = sign_out ? -INF : INF; - // else if (abs(S1.f16) == +-INF || S2.f16 == 0) - // # x/inf, 0/y - // D.f16 = sign_out ? -0 : 0; - // else if ((exp(S2.f16) - exp(S1.f16)) < -150) - // D.f16 = sign_out ? -underflow : underflow; - // else if (exp(S1.f16) == 255) - // D.f16 = sign_out ? -overflow : overflow; - // else - // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). - // Half precision division fixup. - // S0 = Quotient, S1 = Denominator, S3 = Numerator. - // Given a numerator, denominator, and quotient from a divide, this opcode - // will detect and apply special case numerics, touching up the quotient if - // necessary. This opcode also generates invalid, denorm and divide by - // zero exceptions caused by the division. - void - Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods --- - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pkaccum_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - // --- description from .arch file --- - // byte = S1.u[1:0]; bit = byte * 8; - // D.u[bit+7:bit] = flt32_to_uint8(S0.f); - // Pack converted value of S0.f into byte S1 of the destination. - // SQ translates to V_CVT_PK_U8_F32. - // Note: this opcode uses src_c to pass destination in as a source. - void - Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1_F32 class methods --- - - Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P1_F32 - - Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() - { - } // ~Inst_VOP3__V_INTERP_P1_F32 - - // --- description from .arch file --- - // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; if - // D == S then data corruption will occur. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P2_F32 class methods --- - - Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p2_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P2_F32 - - Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() - { - } // ~Inst_VOP3__V_INTERP_P2_F32 - - // --- description from .arch file --- - // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_MOV_F32 class methods --- - - Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_mov_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_MOV_F32 - - Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() - { - } // ~Inst_VOP3__V_INTERP_MOV_F32 - - // --- description from .arch file --- - // D.f = {P10,P20,P0}[S.u]; parameter load. - void - Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1LL_F16 class methods --- - - Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1ll_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LL_F16 - - Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LL_F16 - - // --- description from .arch file --- - // D.f32 = P10.f16 * S0.f32 + P0.f16. - // 'LL' stands for 'two LDS arguments'. - // attr_word selects the high or low half 16 bits of each LDS dword - // accessed. - // This opcode is available for 32-bank LDS only. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1LV_F16 class methods --- - - Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1lv_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LV_F16 - - Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LV_F16 - - // --- description from .arch file --- - // D.f32 = P10.f16 * S0.f32 + (S2.u32 >> (attr_word * 16)).f16. - // 'LV' stands for 'One LDS and one VGPR argument'. - // S2 holds two parameters, attr_word selects the high or low word of the - // VGPR for this calculation, as well as the high or low half of the LDS - // data. - // Meant for use with 16-bank LDS. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P2_F16 class methods --- - - Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p2_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P2_F16 - - Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() - { - } // ~Inst_VOP3__V_INTERP_P2_F16 - - // --- description from .arch file --- - // D.f16 = P20.f16 * S0.f32 + S2.f32. - // Final computation. attr_word selects LDS high or low 16bits. Used for - // both 16- and 32-bank LDS. - // Result is always written to the 16 LSBs of the destination VGPR. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_F64 class methods --- - - Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_ADD_F64 - - Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() - { - } // ~Inst_VOP3__V_ADD_F64 - - // --- description from .arch file --- - // D.d = S0.d + S1.d. - void - Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane]) ) { - vdst[lane] = NAN; - } else if (std::isinf(src0[lane]) && - std::isinf(src1[lane])) { - if (std::signbit(src0[lane]) != - std::signbit(src1[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = src0[lane]; - } - } else if (std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::isinf(src1[lane])) { - vdst[lane] = src1[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src1[lane]; - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src0[lane]; - } - } else { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_F64 class methods --- - - Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MUL_F64 - - Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() - { - } // ~Inst_VOP3__V_MUL_F64 - - // --- description from .arch file --- - // D.d = S0.d * S1.d. - void - Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_F64 class methods --- - - Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MIN_F64 - - Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() - { - } // ~Inst_VOP3__V_MIN_F64 - - // --- description from .arch file --- - // D.d = min(S0.d, S1.d). - void - Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F64 class methods --- - - Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MAX_F64 - - Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() - { - } // ~Inst_VOP3__V_MAX_F64 - - // --- description from .arch file --- - // D.d = max(S0.d, S1.d). - void - Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F64 class methods --- - - Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_LDEXP_F64 - - Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() - { - } // ~Inst_VOP3__V_LDEXP_F64 - - // --- description from .arch file --- - // D.d = pow(S0.d, S1.i[31:0]). - void - Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - || std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = +0.0; - } - } else { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LO_U32 class methods --- - - Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_lo_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U32 - - Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() - { - } // ~Inst_VOP3__V_MUL_LO_U32 - - // --- description from .arch file --- - // D.u = S0.u * S1.u. - void - Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_U32 class methods --- - - Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32 - - Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() - { - } // ~Inst_VOP3__V_MUL_HI_U32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32. - void - Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_I32 class methods --- - - Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32 - - Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() - { - } // ~Inst_VOP3__V_MUL_HI_I32 - - // --- description from .arch file --- - // D.i = (S0.i * S1.i) >> 32. - void - Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F32 class methods --- - - Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LDEXP_F32 - - Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() - { - } // ~Inst_VOP3__V_LDEXP_F32 - - // --- description from .arch file --- - // D.f = pow(S0.f, S1.i) - void - Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_READLANE_B32 class methods --- - - Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_readlane_b32", true) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_READLANE_B32 - - Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() - { - } // ~Inst_VOP3__V_READLANE_B32 - - // --- description from .arch file --- - // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# - // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - sdst = src0[src1.rawData() & 0x3f]; - - sdst.write(); - } // execute - // --- Inst_VOP3__V_WRITELANE_B32 class methods --- - - Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_writelane_b32", false) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_WRITELANE_B32 - - Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() - { - } // ~Inst_VOP3__V_WRITELANE_B32 - - // --- description from .arch file --- - // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data - // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores - // exec mask. - // Input and output modifiers not supported; this is an untyped operation. - // SQ translates to V_MOV_B32. - void - Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.read(); - src1.read(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - vdst[src1.rawData() & 0x3f] = src0.rawData(); - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BCNT_U32_B32 class methods --- - - Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bcnt_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BCNT_U32_B32 - - Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() - { - } // ~Inst_VOP3__V_BCNT_U32_B32 - - // --- description from .arch file --- - // D.u = CountOneBits(S0.u) + S1.u. Bit count. - void - Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = popCount(src0[lane]) + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MBCNT_LO_U32_B32 class methods --- - - Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mbcnt_lo_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_LO_U32_B32 - - Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 - - // --- description from .arch file --- - // ThreadMask = (1 << ThreadPosition) - 1; - // D.u = CountOneBits(S0.u & ThreadMask[31:0]) + S1.u. - // Masked bit count, ThreadPosition is the position of this thread in the - // --- wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- - - Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mbcnt_hi_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_HI_U32_B32 - - Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 - - // --- description from .arch file --- - // ThreadMask = (1 << ThreadPosition) - 1; - // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. - // Masked bit count, ThreadPosition is the position of this thread in the - // --- wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B64 class methods --- - - Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B64 - - Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() - { - } // ~Inst_VOP3__V_LSHLREV_B64 - - // --- description from .arch file --- - // D.u64 = S1.u64 << S0.u[5:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B64 class methods --- - - Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B64 - - Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() - { - } // ~Inst_VOP3__V_LSHRREV_B64 - - // --- description from .arch file --- - // D.u64 = S1.u64 >> S0.u[5:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I64 class methods --- - - Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I64 - - Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() - { - } // ~Inst_VOP3__V_ASHRREV_I64 - - // --- description from .arch file --- - // D.u64 = signext(S1.u64) >> S0.u[5:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRIG_PREOP_F64 class methods --- - - Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trig_preop_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRIG_PREOP_F64 - - Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() - { - } // ~Inst_VOP3__V_TRIG_PREOP_F64 - - // --- description from .arch file --- - // D.d = Look Up 2/PI (S0.d) with segment select S1.u[4:0]. This operation - // returns an aligned, double precision segment of 2/PI needed to do range - // reduction on S0.d (double-precision value). Multiple segments can be - // specified through S1.u[4:0]. Rounding is always round-to-zero. Large - // inputs (exp > 1968) are scaled to avoid loss of precision through - // denormalization. - void - Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_BFM_B32 class methods --- - - Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFM_B32 - - Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() - { - } // ~Inst_VOP3__V_BFM_B32 - - // --- description from .arch file --- - // D.u = ((1<wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) - << bits(src1[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_PKNORM_I16_F32 class methods --- - - Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pknorm_i16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_I16_F32 - - Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 - - // --- description from .arch file --- - // D = {(snorm)S1.f, (snorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKNORM_U16_F32 class methods --- - - Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pknorm_u16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_U16_F32 - - Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 - - // --- description from .arch file --- - // D = {(unorm)S1.f, (unorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKRTZ_F16_F32 class methods --- - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pkrtz_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() - { - } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - // --- description from .arch file --- - // D = {flt32_to_flt16(S1.f),flt32_to_flt16(S0.f)}, with round-toward-zero - // --- regardless of current round mode setting in hardware. - // This opcode is intended for use with 16-bit compressed exports. - // See V_CVT_F16_F32 for a version that respects the current rounding mode. - void - Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PK_U16_U32 class methods --- - - Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_u16_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_U16_U32 - - Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() - { - } // ~Inst_VOP3__V_CVT_PK_U16_U32 - - // --- description from .arch file --- - // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. - void - Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PK_I16_I32 class methods --- - - Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_i16_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_I16_I32 - - Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() - { - } // ~Inst_VOP3__V_CVT_PK_I16_I32 - - // --- description from .arch file --- - // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. - void - Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_U32 class methods --- - - Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u32") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_U32 - - Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() - { - } // ~Inst_DS__DS_ADD_U32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] += DATA; - void - Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_SUB_U32 class methods --- - - Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u32") - { - } // Inst_DS__DS_SUB_U32 - - Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() - { - } // ~Inst_DS__DS_SUB_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_U32 class methods --- - - Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u32") - { - } // Inst_DS__DS_RSUB_U32 - - Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() - { - } // ~Inst_DS__DS_RSUB_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_U32 class methods --- - - Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u32") - { - } // Inst_DS__DS_INC_U32 - - Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() - { - } // ~Inst_DS__DS_INC_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_U32 class methods --- - - Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u32") - { - } // Inst_DS__DS_DEC_U32 - - Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() - { - } // ~Inst_DS__DS_DEC_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_I32 class methods --- - - Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i32") - { - } // Inst_DS__DS_MIN_I32 - - Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() - { - } // ~Inst_DS__DS_MIN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_I32 class methods --- - - Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i32") - { - } // Inst_DS__DS_MAX_I32 - - Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() - { - } // ~Inst_DS__DS_MAX_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_U32 class methods --- - - Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u32") - { - } // Inst_DS__DS_MIN_U32 - - Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() - { - } // ~Inst_DS__DS_MIN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_U32 class methods --- - - Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u32") - { - } // Inst_DS__DS_MAX_U32 - - Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() - { - } // ~Inst_DS__DS_MAX_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_B32 class methods --- - - Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b32") - { - } // Inst_DS__DS_AND_B32 - - Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() - { - } // ~Inst_DS__DS_AND_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_B32 class methods --- - - Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b32") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicOr); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_OR_B32 - - Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() - { - } // ~Inst_DS__DS_OR_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] |= DATA; - void - Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_OR_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_OR_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - // --- Inst_DS__DS_XOR_B32 class methods --- - - Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b32") - { - } // Inst_DS__DS_XOR_B32 - - Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() - { - } // ~Inst_DS__DS_XOR_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_B32 class methods --- - - Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b32") - { - } // Inst_DS__DS_MSKOR_B32 - - Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() - { - } // ~Inst_DS__DS_MSKOR_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B32 class methods --- - - Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B32 - - Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() - { - } // ~Inst_DS__DS_WRITE_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] = DATA. - // Write dword. - void - Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE2_B32 class methods --- - - Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B32 - - Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() - { - } // ~Inst_DS__DS_WRITE2_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. - // Write 2 dwords. - void - Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_WRITE2ST64_B32 class methods --- - - Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B32 - - Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() - { - } // ~Inst_DS__DS_WRITE2ST64_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; - // Write 2 dwords. - void - Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4 * 64; - Addr offset1 = instData.OFFSET1 * 4 * 64; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_CMPST_B32 class methods --- - - Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b32") - { - } // Inst_DS__DS_CMPST_B32 - - Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() - { - } // ~Inst_DS__DS_CMPST_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP opcode. - void - Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_F32 class methods --- - - Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_F32 - - Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() - { - } // ~Inst_DS__DS_CMPST_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP opcode. - void - Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_F32 class methods --- - - Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_F32 - - Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() - { - } // ~Inst_DS__DS_MIN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN. - void - Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_F32 class methods --- - - Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_F32 - - Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() - { - } // ~Inst_DS__DS_MAX_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX. - void - Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_NOP class methods --- - - Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_nop") - { - setFlag(Nop); - } // Inst_DS__DS_NOP - - Inst_DS__DS_NOP::~Inst_DS__DS_NOP() - { - } // ~Inst_DS__DS_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) - { - gpuDynInst->wavefront()->decLGKMInstsIssued(); - } // execute - // --- Inst_DS__DS_ADD_F32 class methods --- - - Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_f32") - { - setFlag(F32); - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_F32 - - Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() - { - } // ~Inst_DS__DS_ADD_F32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] += DATA; - // Floating point add that handles NaN/INF/denormal values. - void - Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandF32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B8 class methods --- - - Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b8") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B8 - - Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() - { - } // ~Inst_DS__DS_WRITE_B8 - - // --- description from .arch file --- - // MEM[ADDR] = DATA[7:0]. - // Byte write. - void - Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B8_D16_HI class methods --- - - Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b8_d16_hi") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B8_D16_HI - - Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI() - { - } // ~Inst_DS__DS_WRITE_B8_D16_HI - - // --- description from .arch file --- - // MEM[ADDR] = DATA[23:16]. - // Byte write in to high word. - void - Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = bits(data[lane], 23, 16); - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B16 class methods --- - - Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b16") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B16 - - Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() - { - } // ~Inst_DS__DS_WRITE_B16 - - // --- description from .arch file --- - // MEM[ADDR] = DATA[15:0] - // Short write. - void - Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU16 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_ADD_RTN_U32 class methods --- - - Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u32") - { - } // Inst_DS__DS_ADD_RTN_U32 - - Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() - { - } // ~Inst_DS__DS_ADD_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_RTN_U32 class methods --- - - Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u32") - { - } // Inst_DS__DS_SUB_RTN_U32 - - Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() - { - } // ~Inst_DS__DS_SUB_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_RTN_U32 class methods --- - - Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u32") - { - } // Inst_DS__DS_RSUB_RTN_U32 - - Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() - { - } // ~Inst_DS__DS_RSUB_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_RTN_U32 class methods --- - - Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u32") - { - } // Inst_DS__DS_INC_RTN_U32 - - Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() - { - } // ~Inst_DS__DS_INC_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_RTN_U32 class methods --- - - Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u32") - { - } // Inst_DS__DS_DEC_RTN_U32 - - Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() - { - } // ~Inst_DS__DS_DEC_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_I32 class methods --- - - Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i32") - { - } // Inst_DS__DS_MIN_RTN_I32 - - Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() - { - } // ~Inst_DS__DS_MIN_RTN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_I32 class methods --- - - Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i32") - { - } // Inst_DS__DS_MAX_RTN_I32 - - Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() - { - } // ~Inst_DS__DS_MAX_RTN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_U32 class methods --- - - Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u32") - { - } // Inst_DS__DS_MIN_RTN_U32 - - Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() - { - } // ~Inst_DS__DS_MIN_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_U32 class methods --- - - Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u32") - { - } // Inst_DS__DS_MAX_RTN_U32 - - Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() - { - } // ~Inst_DS__DS_MAX_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_RTN_B32 class methods --- - - Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b32") - { - } // Inst_DS__DS_AND_RTN_B32 - - Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() - { - } // ~Inst_DS__DS_AND_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_RTN_B32 class methods --- - - Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b32") - { - } // Inst_DS__DS_OR_RTN_B32 - - Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() - { - } // ~Inst_DS__DS_OR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_RTN_B32 class methods --- - - Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b32") - { - } // Inst_DS__DS_XOR_RTN_B32 - - Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() - { - } // ~Inst_DS__DS_XOR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_RTN_B32 class methods --- - - Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b32") - { - } // Inst_DS__DS_MSKOR_RTN_B32 - - Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() - { - } // ~Inst_DS__DS_MSKOR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") - { - } // Inst_DS__DS_WRXCHG_RTN_B32 - - Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B32 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2_RTN_B32 - - Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B32 - - // --- description from .arch file --- - // Write-exchange 2 separate dwords. - void - Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - // --- description from .arch file --- - // Write-exchange 2 separate dwords with a stride of 64 dwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_B32 class methods --- - - Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b32") - { - } // Inst_DS__DS_CMPST_RTN_B32 - - Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() - { - } // ~Inst_DS__DS_CMPST_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP opcode. - void - Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_F32 class methods --- - - Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_RTN_F32 - - Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() - { - } // ~Inst_DS__DS_CMPST_RTN_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP opcode. - void - Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_F32 class methods --- - - Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_RTN_F32 - - Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() - { - } // ~Inst_DS__DS_MIN_RTN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN. - void - Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_F32 class methods --- - - Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_RTN_F32 - - Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() - { - } // ~Inst_DS__DS_MAX_RTN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX. - void - Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRAP_RTN_B32 class methods --- - - Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrap_rtn_b32") - { - } // Inst_DS__DS_WRAP_RTN_B32 - - Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() - { - } // ~Inst_DS__DS_WRAP_RTN_B32 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_RTN_F32 class methods --- - - Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_RTN_F32 - - Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() - { - } // ~Inst_DS__DS_ADD_RTN_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - // Floating point add that handles NaN/INF/denormal values. - void - Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_B32 class methods --- - - Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B32 - - Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() - { - } // ~Inst_DS__DS_READ_B32 - - // --- description from .arch file --- - // RETURN_DATA = MEM[ADDR]. - // Dword read. - void - Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ2_B32 class methods --- - - Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B32 - - Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() - { - } // ~Inst_DS__DS_READ2_B32 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. - // Read 2 dwords. - void - Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_DS__DS_READ2ST64_B32 class methods --- - - Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B32 - - Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() - { - } // ~Inst_DS__DS_READ2ST64_B32 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. - // Read 2 dwords. - void - Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 4 * 64); - Addr offset1 = (instData.OFFSET1 * 4 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - // --- Inst_DS__DS_READ_I8 class methods --- - - Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I8 - - Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() - { - } // ~Inst_DS__DS_READ_I8 - - // --- description from .arch file --- - // RETURN_DATA = signext(MEM[ADDR][7:0]). - // Signed byte read. - void - Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ_U8 class methods --- - - Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U8 - - Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() - { - } // ~Inst_DS__DS_READ_U8 - - // --- description from .arch file --- - // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. - // Unsigned byte read. - void - Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ_I16 class methods --- - - Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I16 - - Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() - { - } // ~Inst_DS__DS_READ_I16 - - // --- description from .arch file --- - // RETURN_DATA = signext(MEM[ADDR][15:0]). - // Signed short read. - void - Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_U16 class methods --- - - Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U16 - - Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() - { - } // ~Inst_DS__DS_READ_U16 - - // --- description from .arch file --- - // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. - // Unsigned short read. - void - Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - void - Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_SWIZZLE_B32 class methods --- - - Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_swizzle_b32") - { - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_SWIZZLE_B32 - - Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() - { - } // ~Inst_DS__DS_SWIZZLE_B32 - - // --- description from .arch file --- - // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). - // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for - // --- details. - void - Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - /** - * The "DS pattern" is comprised of both offset fields. That is, the - * swizzle pattern between lanes. Bit 15 of the DS pattern dictates - * which swizzle mode to use. There are two different swizzle - * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use - * QDMode else use Bit-masks mode. The remaining bits dictate how to - * swizzle the lanes. - * - * QDMode: Chunks the lanes into 4s and swizzles among them. - * Bits 7:6 dictate where lane 3 (of the current chunk) - * gets its date, 5:4 lane 2, etc. - * - * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. - * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 - * is the and_mask. Each lane is swizzled by performing - * the appropriate operation using these masks. - */ - VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); - - data.read(); - - if (bits(ds_pattern, 15)) { - // QDMode - for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { - /** - * This operation allows data sharing between groups - * of four consecutive threads. Note the increment by - * 4 in the for loop. - */ - if (gpuDynInst->exec_mask[lane]) { - int index0 = lane + bits(ds_pattern, 1, 0); - panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index0); - vdst[lane] - = gpuDynInst->exec_mask[index0] ? data[index0]: 0; - } - if (gpuDynInst->exec_mask[lane + 1]) { - int index1 = lane + bits(ds_pattern, 3, 2); - panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index1); - vdst[lane + 1] - = gpuDynInst->exec_mask[index1] ? data[index1]: 0; - } - if (gpuDynInst->exec_mask[lane + 2]) { - int index2 = lane + bits(ds_pattern, 5, 4); - panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index2); - vdst[lane + 2] - = gpuDynInst->exec_mask[index2] ? data[index2]: 0; - } - if (gpuDynInst->exec_mask[lane + 3]) { - int index3 = lane + bits(ds_pattern, 7, 6); - panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index3); - vdst[lane + 3] - = gpuDynInst->exec_mask[index3] ? data[index3]: 0; - } - } - } else { - // Bit Mode - int and_mask = bits(ds_pattern, 4, 0); - int or_mask = bits(ds_pattern, 9, 5); - int xor_mask = bits(ds_pattern, 14, 10); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - int index = (((lane & and_mask) | or_mask) ^ xor_mask); - // Adjust for the next 32 lanes. - if (lane > 31) { - index += 32; - } - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " - "out of bounds.\n", gpuDynInst->disassemble(), - index); - vdst[lane] - = gpuDynInst->exec_mask[index] ? data[index] : 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - // --- Inst_DS__DS_PERMUTE_B32 class methods --- - - Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_permute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_PERMUTE_B32 - - Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() - { - } // ~Inst_DS__DS_PERMUTE_B32 - - // --- description from .arch file --- - // Forward permute. - void - Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[index] = data[lane]; - } else { - vdst[index] = 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - // --- Inst_DS__DS_BPERMUTE_B32 class methods --- - - Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_bpermute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_BPERMUTE_B32 - - Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() - { - } // ~Inst_DS__DS_BPERMUTE_B32 - - // --- description from .arch file --- - // Backward permute. - void - Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[lane] = data[index]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - - // --- Inst_DS__DS_ADD_U64 class methods --- - - Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u64") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_U64 - - Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() - { - } // ~Inst_DS__DS_ADD_U64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR] += DATA[0:1]; - void - Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_SUB_U64 class methods --- - - Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u64") - { - } // Inst_DS__DS_SUB_U64 - - Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() - { - } // ~Inst_DS__DS_SUB_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_U64 class methods --- - - Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u64") - { - } // Inst_DS__DS_RSUB_U64 - - Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() - { - } // ~Inst_DS__DS_RSUB_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_U64 class methods --- - - Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u64") - { - } // Inst_DS__DS_INC_U64 - - Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() - { - } // ~Inst_DS__DS_INC_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_U64 class methods --- - - Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u64") - { - } // Inst_DS__DS_DEC_U64 - - Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() - { - } // ~Inst_DS__DS_DEC_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_I64 class methods --- - - Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i64") - { - } // Inst_DS__DS_MIN_I64 - - Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() - { - } // ~Inst_DS__DS_MIN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_I64 class methods --- - - Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i64") - { - } // Inst_DS__DS_MAX_I64 - - Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() - { - } // ~Inst_DS__DS_MAX_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_U64 class methods --- - - Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u64") - { - } // Inst_DS__DS_MIN_U64 - - Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() - { - } // ~Inst_DS__DS_MIN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_U64 class methods --- - - Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u64") - { - } // Inst_DS__DS_MAX_U64 - - Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() - { - } // ~Inst_DS__DS_MAX_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_B64 class methods --- - - Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b64") - { - } // Inst_DS__DS_AND_B64 - - Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() - { - } // ~Inst_DS__DS_AND_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_B64 class methods --- - - Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b64") - { - } // Inst_DS__DS_OR_B64 - - Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() - { - } // ~Inst_DS__DS_OR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_B64 class methods --- - - Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b64") - { - } // Inst_DS__DS_XOR_B64 - - Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() - { - } // ~Inst_DS__DS_XOR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_B64 class methods --- - - Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b64") - { - } // Inst_DS__DS_MSKOR_B64 - - Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() - { - } // ~Inst_DS__DS_MSKOR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B64 class methods --- - - Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B64 - - Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() - { - } // ~Inst_DS__DS_WRITE_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR] = DATA. - // Write qword. - void - Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE2_B64 class methods --- - - Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B64 - - Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() - { - } // ~Inst_DS__DS_WRITE2_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. - // Write 2 qwords. - void - Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_WRITE2ST64_B64 class methods --- - - Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B64 - - Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() - { - } // ~Inst_DS__DS_WRITE2ST64_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; - // Write 2 qwords. - void - Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8 * 64; - Addr offset1 = instData.OFFSET1 * 8 * 64; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_CMPST_B64 class methods --- - - Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b64") - { - } // Inst_DS__DS_CMPST_B64 - - Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() - { - } // ~Inst_DS__DS_CMPST_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_F64 class methods --- - - Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_F64 - - Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() - { - } // ~Inst_DS__DS_CMPST_F64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_F64 class methods --- - - Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_F64 - - Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() - { - } // ~Inst_DS__DS_MIN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN_X2. - void - Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_F64 class methods --- - - Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_F64 - - Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() - { - } // ~Inst_DS__DS_MAX_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX_X2. - void - Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_RTN_U64 class methods --- - - Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u64") - { - } // Inst_DS__DS_ADD_RTN_U64 - - Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() - { - } // ~Inst_DS__DS_ADD_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_RTN_U64 class methods --- - - Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u64") - { - } // Inst_DS__DS_SUB_RTN_U64 - - Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() - { - } // ~Inst_DS__DS_SUB_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_RTN_U64 class methods --- - - Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u64") - { - } // Inst_DS__DS_RSUB_RTN_U64 - - Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() - { - } // ~Inst_DS__DS_RSUB_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_RTN_U64 class methods --- - - Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u64") - { - } // Inst_DS__DS_INC_RTN_U64 - - Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() - { - } // ~Inst_DS__DS_INC_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_RTN_U64 class methods --- - - Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u64") - { - } // Inst_DS__DS_DEC_RTN_U64 - - Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() - { - } // ~Inst_DS__DS_DEC_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_I64 class methods --- - - Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i64") - { - } // Inst_DS__DS_MIN_RTN_I64 - - Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() - { - } // ~Inst_DS__DS_MIN_RTN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_I64 class methods --- - - Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i64") - { - } // Inst_DS__DS_MAX_RTN_I64 - - Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() - { - } // ~Inst_DS__DS_MAX_RTN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_U64 class methods --- - - Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u64") - { - } // Inst_DS__DS_MIN_RTN_U64 - - Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() - { - } // ~Inst_DS__DS_MIN_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_U64 class methods --- - - Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u64") - { - } // Inst_DS__DS_MAX_RTN_U64 - - Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() - { - } // ~Inst_DS__DS_MAX_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_RTN_B64 class methods --- - - Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b64") - { - } // Inst_DS__DS_AND_RTN_B64 - - Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() - { - } // ~Inst_DS__DS_AND_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_RTN_B64 class methods --- - - Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b64") - { - } // Inst_DS__DS_OR_RTN_B64 - - Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() - { - } // ~Inst_DS__DS_OR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_RTN_B64 class methods --- - - Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b64") - { - } // Inst_DS__DS_XOR_RTN_B64 - - Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() - { - } // ~Inst_DS__DS_XOR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_RTN_B64 class methods --- - - Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b64") - { - } // Inst_DS__DS_MSKOR_RTN_B64 - - Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() - { - } // ~Inst_DS__DS_MSKOR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") - { - } // Inst_DS__DS_WRXCHG_RTN_B64 - - Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B64 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2_RTN_B64 - - Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B64 - - // --- description from .arch file --- - // Write-exchange 2 separate qwords. - void - Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - // --- description from .arch file --- - // Write-exchange 2 qwords with a stride of 64 qwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_B64 class methods --- - - Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b64") - { - } // Inst_DS__DS_CMPST_RTN_B64 - - Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() - { - } // ~Inst_DS__DS_CMPST_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_F64 class methods --- - - Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_RTN_F64 - - Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() - { - } // ~Inst_DS__DS_CMPST_RTN_F64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_F64 class methods --- - - Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_RTN_F64 - - Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() - { - } // ~Inst_DS__DS_MIN_RTN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN_X2. - void - Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_F64 class methods --- - - Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_RTN_F64 - - Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() - { - } // ~Inst_DS__DS_MAX_RTN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX_X2. - void - Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_B64 class methods --- - - Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B64 - - Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() - { - } // ~Inst_DS__DS_READ_B64 - - // --- description from .arch file --- - // RETURN_DATA = MEM[ADDR]. - // Read 1 qword. - void - Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ2_B64 class methods --- - - Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B64 - - Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() - { - } // ~Inst_DS__DS_READ2_B64 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. - // Read 2 qwords. - void - Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_DS__DS_READ2ST64_B64 class methods --- - - Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B64 - - Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() - { - } // ~Inst_DS__DS_READ2ST64_B64 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. - // Read 2 qwords. - void - Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 8 * 64); - Addr offset1 = (instData.OFFSET1 * 8 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods --- - - Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") - { - } // Inst_DS__DS_CONDXCHG32_RTN_B64 - - Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() - { - } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 - - // --- description from .arch file --- - // Conditional write exchange. - void - Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_U32 class methods --- - - Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u32") - { - } // Inst_DS__DS_ADD_SRC2_U32 - - Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() - { - } // ~Inst_DS__DS_ADD_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_SRC2_U32 class methods --- - - Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u32") - { - } // Inst_DS__DS_SUB_SRC2_U32 - - Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() - { - } // ~Inst_DS__DS_SUB_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_SRC2_U32 class methods --- - - Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u32") - { - } // Inst_DS__DS_RSUB_SRC2_U32 - - Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() - { - } // ~Inst_DS__DS_RSUB_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_SRC2_U32 class methods --- - - Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u32") - { - } // Inst_DS__DS_INC_SRC2_U32 - - Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() - { - } // ~Inst_DS__DS_INC_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_SRC2_U32 class methods --- - - Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u32") - { - } // Inst_DS__DS_DEC_SRC2_U32 - - Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() - { - } // ~Inst_DS__DS_DEC_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_I32 class methods --- - - Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i32") - { - } // Inst_DS__DS_MIN_SRC2_I32 - - Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() - { - } // ~Inst_DS__DS_MIN_SRC2_I32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_I32 class methods --- - - Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i32") - { - } // Inst_DS__DS_MAX_SRC2_I32 - - Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() - { - } // ~Inst_DS__DS_MAX_SRC2_I32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_U32 class methods --- - - Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u32") - { - } // Inst_DS__DS_MIN_SRC2_U32 - - Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() - { - } // ~Inst_DS__DS_MIN_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_U32 class methods --- - - Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u32") - { - } // Inst_DS__DS_MAX_SRC2_U32 - - Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() - { - } // ~Inst_DS__DS_MAX_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_SRC2_B32 class methods --- - - Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b32") - { - } // Inst_DS__DS_AND_SRC2_B32 - - Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() - { - } // ~Inst_DS__DS_AND_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_SRC2_B32 class methods --- - - Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b32") - { - } // Inst_DS__DS_OR_SRC2_B32 - - Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() - { - } // ~Inst_DS__DS_OR_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_SRC2_B32 class methods --- - - Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b32") - { - } // Inst_DS__DS_XOR_SRC2_B32 - - Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() - { - } // ~Inst_DS__DS_XOR_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_SRC2_B32 class methods --- - - Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B32 - - Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() - { - } // ~Inst_DS__DS_WRITE_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write dword. - void - Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_F32 class methods --- - - Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_SRC2_F32 - - Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() - { - } // ~Inst_DS__DS_MIN_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_F32 class methods --- - - Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_SRC2_F32 - - Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() - { - } // ~Inst_DS__DS_MAX_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_F32 class methods --- - - Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_SRC2_F32 - - Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() - { - } // ~Inst_DS__DS_ADD_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] + MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods --- - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_release_all") - { - } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() - { - } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - // --- description from .arch file --- - // GDS Only: The GWS resource (rid) indicated will process this opcode by - // updating the counter and labeling the specified resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Incr the state counter of the resource - // state.counter[rid] = state.wave_in_queue; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release ALL queued waves; it Will have no effect if no - // --- waves are present. - void - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_INIT class methods --- - - Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_init") - { - } // Inst_DS__DS_GWS_INIT - - Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() - { - } // ~Inst_DS__DS_GWS_INIT - - // --- description from .arch file --- - // GDS Only: Initialize a barrier or semaphore resource. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Get the value to use in init - // index = find_first_valid(vector mask) - // value = DATA[thread: index] - // //Set the state of the resource - // state.counter[rid] = lsb(value); //limit #waves - // state.flag[rid] = 0; - // return rd_done; //release calling wave - void - Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_V class methods --- - - Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_v") - { - } // Inst_DS__DS_GWS_SEMA_V - - Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() - { - } // ~Inst_DS__DS_GWS_SEMA_V - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // updating the counter and labeling the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Incr the state counter of the resource - // state.counter[rid]++; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release one waved if any are queued in this resource. - void - Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_BR class methods --- - - Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_br") - { - } // Inst_DS__DS_GWS_SEMA_BR - - Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() - { - } // ~Inst_DS__DS_GWS_SEMA_BR - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // updating the counter by the bulk release delivered count and labeling - // the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // index = find first valid (vector mask) - // count = DATA[thread: index]; - // //Add count to the resource state counter - // state.counter[rid] += count; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release count number of waves, immediately if queued, - // or as they arrive from the noted resource. - void - Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_P class methods --- - - Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_p") - { - } // Inst_DS__DS_GWS_SEMA_P - - Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() - { - } // ~Inst_DS__DS_GWS_SEMA_P - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // queueing it until counter enables a release and then decrementing the - // counter of the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // state.type = SEMAPHORE; - // ENQUEUE until(state[rid].counter > 0) - // state[rid].counter--; - // return rd_done - void - Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_BARRIER class methods --- - - Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_barrier") - { - } // Inst_DS__DS_GWS_BARRIER - - Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() - { - } // ~Inst_DS__DS_GWS_BARRIER - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // queueing it until barrier is satisfied. The number of waves needed is - // passed in as DATA of first valid thread. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0]; - // index = find first valid (vector mask); - // value = DATA[thread: index]; - // // Input Decision Machine - // state.type[rid] = BARRIER; - // if(state[rid].counter <= 0) { - // thread[rid].flag = state[rid].flag; - // ENQUEUE; - // state[rid].flag = !state.flag; - // state[rid].counter = value; - // return rd_done; - // } else { - // state[rid].counter--; - // thread.flag = state[rid].flag; - // ENQUEUE; - // } - // Since the waves deliver the count for the next barrier, this function - // can have a different size barrier for each occurrence. - // // Release Machine - // if(state.type == BARRIER) { - // if(state.flag != thread.flag) { - // return rd_done; - // } - // } - void - Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CONSUME class methods --- - - Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_consume") - { - } // Inst_DS__DS_CONSUME - - Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() - { - } // ~Inst_DS__DS_CONSUME - - // --- description from .arch file --- - // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS - // memory at (M0.base + instr_offset). Return the pre-operation value to - // VGPRs. - void - Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_APPEND class methods --- - - Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_append") - { - } // Inst_DS__DS_APPEND - - Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() - { - } // ~Inst_DS__DS_APPEND - - // --- description from .arch file --- - // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory - // at (M0.base + instr_offset). Return the pre-operation value to VGPRs. - void - Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ORDERED_COUNT class methods --- - - Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_ordered_count") - { - } // Inst_DS__DS_ORDERED_COUNT - - Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() - { - } // ~Inst_DS__DS_ORDERED_COUNT - - // --- description from .arch file --- - // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated - // ordered-count counters (aka 'packers'). Additional bits of instr.offset - // field are overloaded to hold packer-id, 'last'. - void - Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_U64 class methods --- - - Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u64") - { - } // Inst_DS__DS_ADD_SRC2_U64 - - Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() - { - } // ~Inst_DS__DS_ADD_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_SRC2_U64 class methods --- - - Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u64") - { - } // Inst_DS__DS_SUB_SRC2_U64 - - Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() - { - } // ~Inst_DS__DS_SUB_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_SRC2_U64 class methods --- - - Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u64") - { - } // Inst_DS__DS_RSUB_SRC2_U64 - - Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() - { - } // ~Inst_DS__DS_RSUB_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_SRC2_U64 class methods --- - - Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u64") - { - } // Inst_DS__DS_INC_SRC2_U64 - - Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() - { - } // ~Inst_DS__DS_INC_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_SRC2_U64 class methods --- - - Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u64") - { - } // Inst_DS__DS_DEC_SRC2_U64 - - Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() - { - } // ~Inst_DS__DS_DEC_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_I64 class methods --- - - Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i64") - { - } // Inst_DS__DS_MIN_SRC2_I64 - - Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() - { - } // ~Inst_DS__DS_MIN_SRC2_I64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_I64 class methods --- - - Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i64") - { - } // Inst_DS__DS_MAX_SRC2_I64 - - Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() - { - } // ~Inst_DS__DS_MAX_SRC2_I64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_U64 class methods --- - - Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u64") - { - } // Inst_DS__DS_MIN_SRC2_U64 - - Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() - { - } // ~Inst_DS__DS_MIN_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_U64 class methods --- - - Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u64") - { - } // Inst_DS__DS_MAX_SRC2_U64 - - Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() - { - } // ~Inst_DS__DS_MAX_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_SRC2_B64 class methods --- - - Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b64") - { - } // Inst_DS__DS_AND_SRC2_B64 - - Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() - { - } // ~Inst_DS__DS_AND_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_SRC2_B64 class methods --- - - Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b64") - { - } // Inst_DS__DS_OR_SRC2_B64 - - Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() - { - } // ~Inst_DS__DS_OR_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_SRC2_B64 class methods --- - - Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b64") - { - } // Inst_DS__DS_XOR_SRC2_B64 - - Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() - { - } // ~Inst_DS__DS_XOR_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_SRC2_B64 class methods --- - - Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B64 - - Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() - { - } // ~Inst_DS__DS_WRITE_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write qword. - void - Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_F64 class methods --- - - Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_SRC2_F64 - - Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() - { - } // ~Inst_DS__DS_MIN_SRC2_F64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_F64 class methods --- - - Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_SRC2_F64 - - Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() - { - } // ~Inst_DS__DS_MAX_SRC2_F64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B96 class methods --- - - Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b96") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B96 - - Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() - { - } // ~Inst_DS__DS_WRITE_B96 - - // --- description from .arch file --- - // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. - // Tri-dword write. - void - Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<3>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B128 class methods --- - - Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b128") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B128 - - Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() - { - } // ~Inst_DS__DS_WRITE_B128 - - // --- description from .arch file --- - // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. - // Qword write. - void - Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_READ_B96 class methods --- - - Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b96") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B96 - - Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() - { - } // ~Inst_DS__DS_READ_B96 - - // --- description from .arch file --- - // Tri-dword read. - void - Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<3>(gpuDynInst, offset); - } - - void - Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } - // --- Inst_DS__DS_READ_B128 class methods --- - - Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b128") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B128 - - Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() - { - } // ~Inst_DS__DS_READ_B128 - - // --- description from .arch file --- - // Qword read. - void - Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - // --- description from .arch file --- - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - // --- description from .arch file --- - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - // --- description from .arch file --- - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - // --- description from .arch file --- - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X - - // --- description from .arch file --- - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - // --- description from .arch file --- - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - // --- description from .arch file --- - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - // --- description from .arch file --- - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - // --- description from .arch file --- - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - // --- description from .arch file --- - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") - { - setFlag(Store); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - // --- description from .arch file --- - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - // --- description from .arch file --- - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods --- - - Inst_MUBUF__BUFFER_LOAD_UBYTE - ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_UBYTE - - Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE - - // --- description from .arch file --- - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // execute - - // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods --- - - Inst_MUBUF__BUFFER_LOAD_SBYTE - ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SBYTE - - Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE - - // --- description from .arch file --- - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods --- - - Inst_MUBUF__BUFFER_LOAD_USHORT - ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_USHORT - - Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_USHORT - - // --- description from .arch file --- - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // execute - - // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods --- - - Inst_MUBUF__BUFFER_LOAD_SSHORT - ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SSHORT - - Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT - - // --- description from .arch file --- - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORD - ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORD - - Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORD - - // --- description from .arch file --- - // Untyped buffer load dword. - void - Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX2 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer load 2 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX3 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer load 3 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX4 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer load 4 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - vdst3[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods --- - - Inst_MUBUF__BUFFER_STORE_BYTE - ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_BYTE - - Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() - { - } // ~Inst_MUBUF__BUFFER_STORE_BYTE - - // --- description from .arch file --- - // Untyped buffer store byte. - void - Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI8 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods --- - - Inst_MUBUF__BUFFER_STORE_SHORT - ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_SHORT - - Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() - { - } // ~Inst_MUBUF__BUFFER_STORE_SHORT - - // --- description from .arch file --- - // Untyped buffer store short. - void - Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI16 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORD:: - Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORD - - Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORD - - // --- description from .arch file --- - // Untyped buffer store dword. - void - Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX2 - ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX2 - - Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer store 2 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX3 - ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX3 - - Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer store 3 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX4 - ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX4 - - Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer store 4 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 3] - = data3[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods --- - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD - ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_lds_dword") - { - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - // --- description from .arch file --- - // Store one DWORD from LDS memory to system memory without utilizing - // VGPRs. - void - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_WBINVL1 class methods --- - - Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1") - { - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1 - - Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1 - - // --- description from .arch file --- - // Write back and invalidate the shader L1. - // Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - - void - Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) - { - // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we - // need to precisely communicate the writeback-invalidate operation to - // the new gfx10 coalescer rather than sending AcquireRelease markers. - // The SICoalescer would need to be updated appropriately as well. - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - void - Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods --- - - Inst_MUBUF__BUFFER_WBINVL1_VOL - ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { - // This instruction is same as buffer_wbinvl1 instruction except this - // instruction only invalidate L1 shader line with MTYPE SC and GC. - // Since Hermes L1 (TCP) do not differentiate between its cache lines, - // this instruction currently behaves (and implemented ) exactly like - // buffer_wbinvl1 instruction. - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1_VOL - - Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL - - // --- description from .arch file --- - // Write back and invalidate the shader L1 only for lines that are marked - // --- volatile. - // Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) - { - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SWAP - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP - - Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 src(gpuDynInst, extData.VDATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1); - - rsrcDesc.read(); - offset.read(); - src.read(); - cmp.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = src[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_ADD - ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD - - Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SUB - ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB - - Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMIN - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN - - Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMIN - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN - - Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMAX - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX - - Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMAX - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX - - Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_AND - ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND - - Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_OR - ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_OR - - Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_XOR - ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR - - Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_INC - ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC - - Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_DEC - ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC - - Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_X class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - // --- description from .arch file --- - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - // --- description from .arch file --- - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - // --- description from .arch file --- - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - // --- description from .arch file --- - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_X class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - // --- description from .arch file --- - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XY class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - // --- description from .arch file --- - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - // --- description from .arch file --- - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - // --- description from .arch file --- - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - // --- description from .arch file --- - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - // --- description from .arch file --- - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - // --- description from .arch file --- - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - // --- description from .arch file --- - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( - GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD class methods --- - - Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD - - Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() - { - } // ~Inst_MIMG__IMAGE_LOAD - - // --- description from .arch file --- - // Image memory load with format conversion specified in T#. No sampler. - void - Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP - - Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP - - // --- description from .arch file --- - // Image memory load with user-supplied mip level. No sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_PCK class methods --- - - Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK - - Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK - - // --- description from .arch file --- - // Image memory load with no format conversion. No sampler. - void - Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_PCK_SGN class methods --- - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN - - // --- description from .arch file --- - // Image memory load with with no format conversion and sign extension. No - // --- sampler. - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK - - // --- description from .arch file --- - // Image memory load with user-supplied mip level, no format conversion. No - // --- sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - // --- description from .arch file --- - // Image memory load with user-supplied mip level, no format conversion and - // --- with sign extension. No sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE class methods --- - - Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE - - Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() - { - } // ~Inst_MIMG__IMAGE_STORE - - // --- description from .arch file --- - // Image memory store with format conversion specified in T#. No sampler. - void - Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_MIP class methods --- - - Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP - - Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP - - // --- description from .arch file --- - // Image memory store with format conversion specified in T# to user - // specified mip level. No sampler. - void - Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_PCK class methods --- - - Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_PCK - - Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_PCK - - // --- description from .arch file --- - // Image memory store of packed data without format conversion. No sampler. - void - Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_MIP_PCK class methods --- - - Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP_PCK - - Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK - - // --- description from .arch file --- - // Image memory store of packed data without format conversion to - // user-supplied mip level. No sampler. - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_GET_RESINFO class methods --- - - Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_resinfo") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_RESINFO - - Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() - { - } // ~Inst_MIMG__IMAGE_GET_RESINFO - - // --- description from .arch file --- - // return resource info for a given mip level specified in the address - // vgpr. No sampler. Returns 4 integer values into VGPRs 3-0: - // {num_mip_levels, depth, height, width}. - void - Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SWAP class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SWAP - - Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_CMPSWAP class methods --- - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_ADD class methods --- - - Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_ADD - - Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SUB class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SUB - - Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SMIN class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMIN - - Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_UMIN class methods --- - - Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMIN - - Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SMAX class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMAX - - Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_UMAX class methods --- - - Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMAX - - Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_AND class methods --- - - Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_AND - - Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_OR class methods --- - - Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_OR - - Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_XOR class methods --- - - Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_XOR - - Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_INC class methods --- - - Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_INC - - Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_DEC class methods --- - - Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_DEC - - Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE class methods --- - - Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample") - { - } // Inst_MIMG__IMAGE_SAMPLE - - Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() - { - } // ~Inst_MIMG__IMAGE_SAMPLE - - // --- description from .arch file --- - // sample texture map. - void - Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL - - Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D - - Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D - - // --- description from .arch file --- - // sample texture map, with user derivatives - void - Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL - - Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with user - // --- derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_L class methods --- - - Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L - - Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L - - // --- description from .arch file --- - // sample texture map, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B - - Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B - - // --- description from .arch file --- - // sample texture map, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL - - Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_LZ class methods --- - - Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ - - Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ - - // --- description from .arch file --- - // sample texture map, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C - - Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C - - // --- description from .arch file --- - // sample texture map, with PCF. - void - Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D - - Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D - - // --- description from .arch file --- - // SAMPLE_C, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_L class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L - - Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L - - // --- description from .arch file --- - // SAMPLE_C, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B - - Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B - - // --- description from .arch file --- - // SAMPLE_C, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ - - // --- description from .arch file --- - // SAMPLE_C, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_O - - Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_O - - // --- description from .arch file --- - // sample texture map, with user offsets. - void - Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O - - // --- description from .arch file --- - // SAMPLE_O with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_O - - Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_O - - // --- description from .arch file --- - // SAMPLE_O, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_L_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L_O - - Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L_O - - // --- description from .arch file --- - // SAMPLE_O, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_O - - Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_O - - // --- description from .arch file --- - // SAMPLE_O, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_LZ_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O - - // --- description from .arch file --- - // SAMPLE_O, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_O - - Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_O - - // --- description from .arch file --- - // SAMPLE_C with user specified offsets. - void - Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_L_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L_O - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O - - // --- description from .arch file --- - // SAMPLE_C_O, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - // --- description from .arch file --- - // SAMPLE_C_O, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4 class methods --- - - Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4 - - Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() - { - } // ~Inst_MIMG__IMAGE_GATHER4 - - // --- description from .arch file --- - // gather 4 single component elements (2x2). - void - Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL - - Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD clamp. - void - Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_L class methods --- - - Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L - - Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD. - void - Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B class methods --- - - Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B - - Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias. - void - Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL - - Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias and clamp. - void - Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_LZ class methods --- - - Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ - - Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ - - // --- description from .arch file --- - // gather 4 single component elements (2x2) at level 0. - void - Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C class methods --- - - Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C - - Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with PCF. - void - Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL - - Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD clamp and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_L class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L - - Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B - - Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias, clamp and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_LZ class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ - - Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ - - // --- description from .arch file --- - // gather 4 single component elements (2x2) at level 0, with PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_O - - Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_O - - // --- description from .arch file --- - // GATHER4, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL_O - - Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL_O - - // --- description from .arch file --- - // GATHER4_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_L_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L_O - - Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L_O - - // --- description from .arch file --- - // GATHER4_L, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_O - - Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_O - - // --- description from .arch file --- - // GATHER4_B, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O - - // --- description from .arch file --- - // GATHER4_B_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_LZ_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ_O - - Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O - - // --- description from .arch file --- - // GATHER4_LZ, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_O - - Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_O - - // --- description from .arch file --- - // GATHER4_C, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O - - // --- description from .arch file --- - // GATHER4_C_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_L_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L_O - - Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O - - // --- description from .arch file --- - // GATHER4_C_L, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_O - - Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O - - // --- description from .arch file --- - // GATHER4_B, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - // --- description from .arch file --- - // GATHER4_B_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_LZ_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - // --- description from .arch file --- - // GATHER4_C_LZ, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GET_LOD class methods --- - - Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_lod") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_LOD - - Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() - { - } // ~Inst_MIMG__IMAGE_GET_LOD - - // --- description from .arch file --- - // Return calculated LOD. Vdata gets 2 32bit integer values: { rawLOD, - // --- clampedLOD }. - void - Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD - - Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD - - // --- description from .arch file --- - // sample texture map, with user derivatives (LOD per quad) - void - Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with user - // --- derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD - - Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD - - // --- description from .arch file --- - // SAMPLE_C, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_O - - Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O - - // --- description from .arch file --- - // SAMPLE_O, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_EXP__EXP class methods --- - - Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) - : Inst_EXP(iFmt, "exp") - { - } // Inst_EXP__EXP - - Inst_EXP__EXP::~Inst_EXP__EXP() - { - } // ~Inst_EXP__EXP - - // --- description from .arch file --- - // Export through SX. - void - Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods --- - - Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_UBYTE - - Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_UBYTE - - // --- description from .arch file --- - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } // execute - // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- - - Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SBYTE - - Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_SBYTE - - // --- description from .arch file --- - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_LOAD_USHORT class methods --- - - Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_USHORT - - Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_USHORT - - // --- description from .arch file --- - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } // execute - - // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods --- - - Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SSHORT - - Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_SSHORT - - // --- description from .arch file --- - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_LOAD_DWORD class methods --- - - Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORD - - Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORD - - // --- description from .arch file --- - // Untyped buffer load dword. - void - Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX2 - - Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer load 2 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX3 - - Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer load 3 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX4 - - Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer load 4 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_BYTE class methods --- - - Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_BYTE - - Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() - { - } // ~Inst_FLAT__FLAT_STORE_BYTE - - // --- description from .arch file --- - // Untyped buffer store byte. - void - Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU8 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_STORE_SHORT class methods --- - - Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_SHORT - - Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() - { - } // ~Inst_FLAT__FLAT_STORE_SHORT - - // --- description from .arch file --- - // Untyped buffer store short. - void - Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU16 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_SHORT_D16_HI class methods --- - - Inst_FLAT__FLAT_STORE_SHORT_D16_HI:: - Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_short_d16_hi") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_SHORT_D16_HI - - Inst_FLAT__FLAT_STORE_SHORT_D16_HI::~Inst_FLAT__FLAT_STORE_SHORT_D16_HI() - { - } // ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI - - // --- description from .arch file --- - // Untyped buffer store short. - void - Inst_FLAT__FLAT_STORE_SHORT_D16_HI::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = (data[lane] >> 16); - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_SHORT_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_SHORT_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORD class methods --- - - Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORD - - Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() - { - } // ~Inst_FLAT__FLAT_STORE_DWORD - - // --- description from .arch file --- - // Untyped buffer store dword. - void - Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX2 - - Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer store 2 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX3 - - Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer store 3 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX4 - - Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer store 4 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); - - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods --- - - Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP - - Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - - // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP - ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD - - Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods --- - - Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB - - Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN - - Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN - - Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_UMIN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_UMIN::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX - - Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX - - Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_UMAX::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_UMAX::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_AND class methods --- - - Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND - - Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_AND::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_AND::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_OR class methods --- - - Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR - - Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - - // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- - - Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR - - Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_XOR::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_XOR::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_INC class methods --- - - Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC - - Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods --- - - Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC - - Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND_X2 - - Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_AND_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_AND_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR_X2 - - Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_OR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_OR_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_XOR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_XOR_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC_X2 - - Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD_F32::Inst_FLAT__FLAT_ATOMIC_ADD_F32( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add_f32") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD_F32 - - Inst_FLAT__FLAT_ATOMIC_ADD_F32::~Inst_FLAT__FLAT_ATOMIC_ADD_F32() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32 - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods --- - - Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 - - Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16() - { - } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 - - void - Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD_F64::Inst_FLAT__FLAT_ATOMIC_ADD_F64( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add_f64") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD_F64 - - Inst_FLAT__FLAT_ATOMIC_ADD_F64::~Inst_FLAT__FLAT_ATOMIC_ADD_F64() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64 - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F64::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD_F64::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods --- - - Inst_FLAT__FLAT_ATOMIC_MIN_F64::Inst_FLAT__FLAT_ATOMIC_MIN_F64( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_min_f64") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_MIN_F64 - - Inst_FLAT__FLAT_ATOMIC_MIN_F64::~Inst_FLAT__FLAT_ATOMIC_MIN_F64() - { - } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64 - - void - Inst_FLAT__FLAT_ATOMIC_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_MIN_F64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_MIN_F64::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods --- - - Inst_FLAT__FLAT_ATOMIC_MAX_F64::Inst_FLAT__FLAT_ATOMIC_MAX_F64( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_max_f64") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_MAX_F64 - - Inst_FLAT__FLAT_ATOMIC_MAX_F64::~Inst_FLAT__FLAT_ATOMIC_MAX_F64() - { - } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64 - - void - Inst_FLAT__FLAT_ATOMIC_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - atomicExecute(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_MAX_F64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_MAX_F64::completeAcc(GPUDynInstPtr gpuDynInst) - { - atomicComplete(gpuDynInst); - } // completeAcc - // --- Inst_VOP3P__V_PK_FMA_F32 class methods --- - - Inst_VOP3P__V_PK_FMA_F32::Inst_VOP3P__V_PK_FMA_F32(InFmt_VOP3P *iFmt) - : Inst_VOP3P(iFmt, "v_pk_fma_f32") - { - setFlag(ALU); - } // Inst_VOP3P__V_PK_FMA_F32 - - Inst_VOP3P__V_PK_FMA_F32::~Inst_VOP3P__V_PK_FMA_F32() - { - } // ~Inst_VOP3P__V_PK_FMA_F32 - - // D.f[63:32] = S0.f[63:32] * S1.f[63:32] + S2.f[63:32] . D.f[31:0] = - // S0.f[31:0] * S1.f[31:0] + S2.f[31:0] . - void - Inst_VOP3P__V_PK_FMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - // This is a special case of packed instructions which operates on - // 64-bit inputs/outputs and not 32-bit. U64 is used here as float - // values cannot use bitwise operations. Consider the U64 to imply - // untyped 64-bits of data. - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - int opsel = instData.OPSEL; - int opsel_hi = extData.OPSEL_HI | (instData.OPSEL_HI2 << 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - uint32_t s0l = (opsel & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - uint32_t s1l = (opsel & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - uint32_t s2l = (opsel & 4) ? bits(src2[lane], 63, 32) - : bits(src2[lane], 31, 0); - - float dword1 = std::fma(*reinterpret_cast(&s0l), - *reinterpret_cast(&s1l), - *reinterpret_cast(&s2l)); - - uint32_t s0h = (opsel_hi & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - uint32_t s1h = (opsel_hi & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - uint32_t s2h = (opsel_hi & 4) ? bits(src2[lane], 63, 32) - : bits(src2[lane], 31, 0); - - float dword2 = std::fma(*reinterpret_cast(&s0h), - *reinterpret_cast(&s1h), - *reinterpret_cast(&s2h)); - - uint32_t result1 = *reinterpret_cast(&dword1); - uint32_t result2 = *reinterpret_cast(&dword2); - - vdst[lane] = (static_cast(result2) << 32) | result1; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3P__V_PK_MUL_F32 class methods --- - - Inst_VOP3P__V_PK_MUL_F32::Inst_VOP3P__V_PK_MUL_F32(InFmt_VOP3P *iFmt) - : Inst_VOP3P(iFmt, "v_pk_mul_f32") - { - setFlag(ALU); - } // Inst_VOP3P__V_PK_MUL_F32 - - Inst_VOP3P__V_PK_MUL_F32::~Inst_VOP3P__V_PK_MUL_F32() - { - } // ~Inst_VOP3P__V_PK_MUL_F32 - - // D.f[63:32] = S0.f[63:32] * S1.f[63:32] . D.f[31:0] = S0.f[31:0] * - // S1.f[31:0] - void - Inst_VOP3P__V_PK_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - // This is a special case of packed instructions which operates on - // 64-bit inputs/outputs and not 32-bit. U64 is used here as float - // values cannot use bitwise operations. Consider the U64 to imply - // untyped 64-bits of data. - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - int opsel = instData.OPSEL; - int opsel_hi = extData.OPSEL_HI; - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - - float dword1 = *reinterpret_cast(&lower_dword) - * *reinterpret_cast(&upper_dword); - - lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - - float dword2 = *reinterpret_cast(&lower_dword) - * *reinterpret_cast(&upper_dword); - - uint32_t result1 = *reinterpret_cast(&dword1); - uint32_t result2 = *reinterpret_cast(&dword2); - - vdst[lane] = (static_cast(result2) << 32) | result1; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3P__V_PK_ADD_F32 class methods --- - - Inst_VOP3P__V_PK_ADD_F32::Inst_VOP3P__V_PK_ADD_F32(InFmt_VOP3P *iFmt) - : Inst_VOP3P(iFmt, "v_pk_add_f32") - { - setFlag(ALU); - } // Inst_VOP3P__V_PK_ADD_F32 - - Inst_VOP3P__V_PK_ADD_F32::~Inst_VOP3P__V_PK_ADD_F32() - { - } // ~Inst_VOP3P__V_PK_ADD_F32 - - // D.f[63:32] = S0.f[63:32] + S1.f[63:32] . D.f[31:0] = S0.f[31:0] + - // S1.f[31:0] - void - Inst_VOP3P__V_PK_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - // This is a special case of packed instructions which operates on - // 64-bit inputs/outputs and not 32-bit. U64 is used here as float - // values cannot use bitwise operations. Consider the U64 to imply - // untyped 64-bits of data. - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - int opsel = instData.OPSEL; - int opsel_hi = extData.OPSEL_HI; - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - - float dword1 = *reinterpret_cast(&lower_dword) - + *reinterpret_cast(&upper_dword); - - lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - - float dword2 = *reinterpret_cast(&lower_dword) - + *reinterpret_cast(&upper_dword); - - uint32_t result1 = *reinterpret_cast(&dword1); - uint32_t result2 = *reinterpret_cast(&dword2); - - vdst[lane] = (static_cast(result2) << 32) | result1; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3P__V_PK_MOV_B32 class methods --- - - Inst_VOP3P__V_PK_MOV_B32::Inst_VOP3P__V_PK_MOV_B32(InFmt_VOP3P *iFmt) - : Inst_VOP3P(iFmt, "v_pk_mov_b32") - { - setFlag(ALU); - } // Inst_VOP3P__V_PK_MOV_B32 - - Inst_VOP3P__V_PK_MOV_B32::~Inst_VOP3P__V_PK_MOV_B32() - { - } // ~Inst_VOP3P__V_PK_MOV_B32 - - // D.u[63:32] = S1.u[31:0]; D.u[31:0] = S0.u[31:0]. - void - Inst_VOP3P__V_PK_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - // This is a special case of packed instructions which operates on - // 64-bit inputs/outputs and not 32-bit. - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - // Only OPSEL[1:0] are used - // OPSEL[0] 0/1: Lower dest dword = lower/upper dword of src0 - - int opsel = instData.OPSEL; - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - // OPSEL[1] 0/1: Lower dest dword = lower/upper dword of src1 - uint64_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) - : bits(src0[lane], 31, 0); - uint64_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) - : bits(src1[lane], 31, 0); - - vdst[lane] = upper_dword << 32 | lower_dword; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 class methods --- - - Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8:: - Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8(InFmt_VOP3P_MAI *iFmt) - : Inst_VOP3P_MAI(iFmt, "v_mfma_i32_16x16x16i8") - { - setFlag(ALU); - } // Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 - - Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8:: - ~Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8() - { - } // ~Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 - - // D(16x16I32) = A(16x16I8) x B(16x16I8) + C(16x16I32), 1 Blocks, 8 - // pass, srcA/srcB 1 archVgpr, srcC/D 4 accVGPR - void - Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8::execute(GPUDynInstPtr gpuDynInst) - { - int acc_offset = 0; - if (instData.ACC_CD) { - warn("ACC_CD not yet implemented\n"); - } - - // int8 size allows for 4 elements per lane. At 16x16 this means 4 - // lanes per column (A matrix) / (B matrix). This whole matrix fits - // in one VGPR. The C matrix with size int32 requires 4 VGPRs. - // Handle the C matrix by using a delta. This is set to 1 normally to - // move to the next VGPR (1 dword away) and 0 if the input is a scalar - // reg (e.g., a constant). - int delta = isVectorReg(extData.SRC2) ? 1 : 0; - - // VecOperandI8 will read 8 bits and sign extend, so used U32 to read - // as "untyped" 32-bit values. - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2a(gpuDynInst, extData.SRC2+acc_offset); - ConstVecOperandI32 src2b(gpuDynInst, extData.SRC2+acc_offset+1*delta); - ConstVecOperandI32 src2c(gpuDynInst, extData.SRC2+acc_offset+2*delta); - ConstVecOperandI32 src2d(gpuDynInst, extData.SRC2+acc_offset+3*delta); - - VecOperandI32 vdsta(gpuDynInst, instData.VDST+acc_offset); - VecOperandI32 vdstb(gpuDynInst, instData.VDST+acc_offset+1); - VecOperandI32 vdstc(gpuDynInst, instData.VDST+acc_offset+2); - VecOperandI32 vdstd(gpuDynInst, instData.VDST+acc_offset+3); - - src0.readSrc(); - src1.readSrc(); - src2a.readSrc(); - src2b.readSrc(); - src2c.readSrc(); - src2d.readSrc(); - - int32_t A[16][16]; - for (int i = 0; i < 64; ++i) { - // src0[0:15] contains columns 1 - 4 packed for rows 0 - 15, - // src0[16:31] contains columns 5 - 8 packed for rows 0 - 15, - // src0[32:47] contains columns 9 - 12 packed for rows 0 - 15, - // src0[48:63] contains columns 13 - 16 packed for rows 0 - 15, - int row = i % 16; - int start_col = (i / 16) * 4; - - A[row][start_col+0] = sext<8>(bits(src0[i], 7, 0)); - A[row][start_col+1] = sext<8>(bits(src0[i], 15, 8)); - A[row][start_col+2] = sext<8>(bits(src0[i], 23, 16)); - A[row][start_col+3] = sext<8>(bits(src0[i], 31, 24)); - } - - int32_t B[16][16]; - for (int i = 0; i < 64; ++i) { - // src1[0:15] contains rows 1 - 4 packed for columns 0 - 15 - // src1[16:31] contains rows 5 - 8 packed for columns 0 - 15 - // src1[32:47] contains rows 9 - 12 packed for columns 0 - 15 - // src1[48:63] contains rows 13 - 16 packed for columns 0 - 15 - int start_row = (i / 16) * 4; - int col = i % 16; - - B[start_row+0][col] = sext<8>(bits(src1[i], 7, 0)); - B[start_row+1][col] = sext<8>(bits(src1[i], 15, 8)); - B[start_row+2][col] = sext<8>(bits(src1[i], 23, 16)); - B[start_row+3][col] = sext<8>(bits(src1[i], 31, 24)); - } - - int32_t result[16][16]; - - // Load accumulation matrix C into result - for (int i = 0; i < 64; ++i) { - // src2a contains rows 0, 4, 8, 12 - result[(i/16)*4][(i%16)] = src2a[i]; - // src2b contains rows 1, 5, 9, 13 - result[(i/16)*4+1][(i%16)] = src2b[i]; - // src2c contains rows 2, 6, 10, 14 - result[(i/16)*4+2][(i%16)] = src2c[i]; - // src2d contains rows 3, 7, 11, 15 - result[(i/16)*4+3][(i%16)] = src2d[i]; - } - - // Compute new result - This is (obviously) not optimized - for (int i = 0; i < 16; ++i) { - for (int j = 0; j < 16; ++j) { - for (int k = 0; k < 16; ++k) { - result[i][j] += A[i][k] * B[k][j]; - } - } - } - - // Put result in dest VGPRs - for (int i = 0; i < 64; ++i) { - // vdsta contains rows 0, 4, 8, 12 - vdsta[i] = result[(i/16)*4][(i%16)]; - // vdstb contains rows 1, 5, 9, 13 - vdstb[i] = result[(i/16)*4+1][(i%16)]; - // vdstc contains rows 2, 6, 10, 14 - vdstc[i] = result[(i/16)*4+2][(i%16)]; - // vdstd contains rows 3, 7, 11, 15 - vdstd[i] = result[(i/16)*4+3][(i%16)]; - } - - vdsta.write(); - vdstb.write(); - vdstc.write(); - vdstd.write(); - } // execute - // --- Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 class methods --- - - Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64:: - Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64(InFmt_VOP3P_MAI *iFmt) - : Inst_VOP3P_MAI(iFmt, "v_mfma_f64_16x16x4f64") - { - setFlag(ALU); - } // Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 - - Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64:: - ~Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64() - { - } // ~Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 - - // D(16x16F64) = A(16x4F64) x B(4x16F64) + C(16x16F64), 1 Blocks, 8 - // pass, srcA/srcB 2 VGPR, srcC/D 8 VGPR - void - Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64::execute(GPUDynInstPtr gpuDynInst) - { - int acc_offset = 0; - if (instData.ACC_CD) { - warn("ACC_CD not yet implemented\n"); - } - - // Handling of src2 is a bit tricky. The operator[] overload cannot - // be used for dword count > 2, and the dword count here is 8. Usually - // src2 is a VGPR/AccGPR, but it might also be constant. In order to - // use operator[] and handle constants, check for VGPR here and set - // a delta for each of the pairs of src2 GPRs. - int delta = isVectorReg(extData.SRC2) ? 2 : 0; - - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2a(gpuDynInst, extData.SRC2+acc_offset); - ConstVecOperandF64 src2b(gpuDynInst, extData.SRC2+acc_offset+1*delta); - ConstVecOperandF64 src2c(gpuDynInst, extData.SRC2+acc_offset+2*delta); - ConstVecOperandF64 src2d(gpuDynInst, extData.SRC2+acc_offset+3*delta); - - VecOperandF64 vdsta(gpuDynInst, instData.VDST+acc_offset); - VecOperandF64 vdstb(gpuDynInst, instData.VDST+acc_offset+2); - VecOperandF64 vdstc(gpuDynInst, instData.VDST+acc_offset+4); - VecOperandF64 vdstd(gpuDynInst, instData.VDST+acc_offset+6); - - src0.readSrc(); - src1.readSrc(); - src2a.readSrc(); - src2b.readSrc(); - src2c.readSrc(); - src2d.readSrc(); - - double result[16][16]; - - // Load src2 into result. src2 is row major - for (int i = 0; i < 64; ++i) { - // src2a contains rows 0 - 3 - result[(i/16)][(i%16)] = src2a[i]; - // src2b contains rows 4 - 7 - result[(i/16)+4][(i%16)] = src2b[i]; - // src2c contains rows 8 - 11 - result[(i/16)+8][(i%16)] = src2c[i]; - // src2d contains rows 12 - 15 - result[(i/16)+12][(i%16)] = src2d[i]; - } - - // Compute new result - for (int i = 0; i < 16; ++i) { - for (int j = 0; j < 16; ++j) { - for (int k = 0; k < 4; ++k) { - // src0 is column major, src1 is row major - int lane_A = 16*k + i; - int lane_B = 16*k + j; - result[i][j] += src0[lane_A] * src1[lane_B]; - } - } - } - - // Put result in dest VGPRs - for (int i = 0; i < 64; ++i) { - // vdsta contains rows 0 - 3 - vdsta[i] = result[(i/16)][(i%16)]; - // src2b contains rows 4 - 7 - vdstb[i] = result[(i/16)+4][(i%16)]; - // src2c contains rows 8 - 11 - vdstc[i] = result[(i/16)+8][(i%16)]; - // src2d contains rows 12 - 15 - vdstd[i] = result[(i/16)+12][(i%16)]; - } - - vdsta.write(); - vdstb.write(); - vdstc.write(); - vdstd.write(); - } // execute -} // namespace VegaISA -} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mimg.cc b/src/arch/amdgpu/vega/insts/mimg.cc new file mode 100644 index 0000000000..29a37cca1d --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mimg.cc @@ -0,0 +1,2047 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MIMG__IMAGE_LOAD class methods --- + + Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD + + Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() + { + } // ~Inst_MIMG__IMAGE_LOAD + + // --- description from .arch file --- + // Image memory load with format conversion specified in T#. No sampler. + void + Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP + + Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP + + // --- description from .arch file --- + // Image memory load with user-supplied mip level. No sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_PCK class methods --- + + Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_pck") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_PCK + + Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() + { + } // ~Inst_MIMG__IMAGE_LOAD_PCK + + // --- description from .arch file --- + // Image memory load with no format conversion. No sampler. + void + Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_PCK_SGN class methods --- + + Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_pck_sgn") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_PCK_SGN + + Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() + { + } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN + + // --- description from .arch file --- + // Image memory load with with no format conversion and sign extension. No + // --- sampler. + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip_pck") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP_PCK + + Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK + + // --- description from .arch file --- + // Image memory load with user-supplied mip level, no format conversion. No + // --- sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN + + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN + + // --- description from .arch file --- + // Image memory load with user-supplied mip level, no format conversion and + // --- with sign extension. No sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE class methods --- + + Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE + + Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() + { + } // ~Inst_MIMG__IMAGE_STORE + + // --- description from .arch file --- + // Image memory store with format conversion specified in T#. No sampler. + void + Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_MIP class methods --- + + Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_mip") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_MIP + + Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() + { + } // ~Inst_MIMG__IMAGE_STORE_MIP + + // --- description from .arch file --- + // Image memory store with format conversion specified in T# to user + // specified mip level. No sampler. + void + Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_PCK class methods --- + + Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_pck") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_PCK + + Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() + { + } // ~Inst_MIMG__IMAGE_STORE_PCK + + // --- description from .arch file --- + // Image memory store of packed data without format conversion. No sampler. + void + Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_MIP_PCK class methods --- + + Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_mip_pck") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_MIP_PCK + + Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() + { + } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK + + // --- description from .arch file --- + // Image memory store of packed data without format conversion to + // user-supplied mip level. No sampler. + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_GET_RESINFO class methods --- + + Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_get_resinfo") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GET_RESINFO + + Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() + { + } // ~Inst_MIMG__IMAGE_GET_RESINFO + + // --- description from .arch file --- + // return resource info for a given mip level specified in the address + // vgpr. No sampler. Returns 4 integer values into VGPRs 3-0: + // {num_mip_levels, depth, height, width}. + void + Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SWAP class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SWAP + + Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_CMPSWAP class methods --- + + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP + + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_ADD class methods --- + + Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_ADD + + Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SUB class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SUB + + Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SMIN class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SMIN + + Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_UMIN class methods --- + + Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_UMIN + + Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SMAX class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SMAX + + Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_UMAX class methods --- + + Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_UMAX + + Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_AND class methods --- + + Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_AND + + Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_OR class methods --- + + Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_OR + + Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_XOR class methods --- + + Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_XOR + + Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_INC class methods --- + + Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_INC + + Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_DEC class methods --- + + Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_DEC + + Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE class methods --- + + Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample") + { + } // Inst_MIMG__IMAGE_SAMPLE + + Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() + { + } // ~Inst_MIMG__IMAGE_SAMPLE + + // --- description from .arch file --- + // sample texture map. + void + Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CL + + Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D + + Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D + + // --- description from .arch file --- + // sample texture map, with user derivatives + void + Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_CL + + Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with user + // --- derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_L class methods --- + + Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_L + + Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_L + + // --- description from .arch file --- + // sample texture map, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B + + Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B + + // --- description from .arch file --- + // sample texture map, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_CL + + Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_LZ class methods --- + + Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_LZ + + Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_LZ + + // --- description from .arch file --- + // sample texture map, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C + + Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C + + // --- description from .arch file --- + // sample texture map, with PCF. + void + Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CL + + Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D + + Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D + + // --- description from .arch file --- + // SAMPLE_C, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_L class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_L + + Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_L + + // --- description from .arch file --- + // SAMPLE_C, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B + + Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B + + // --- description from .arch file --- + // SAMPLE_C, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_LZ + + Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ + + // --- description from .arch file --- + // SAMPLE_C, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_O + + Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_O + + // --- description from .arch file --- + // sample texture map, with user offsets. + void + Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CL_O + + Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O + + // --- description from .arch file --- + // SAMPLE_O with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_O + + Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_O + + // --- description from .arch file --- + // SAMPLE_O, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O + + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_L_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_L_O + + Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_L_O + + // --- description from .arch file --- + // SAMPLE_O, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_O + + Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_O + + // --- description from .arch file --- + // SAMPLE_O, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O + + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_LZ_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_LZ_O + + Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O + + // --- description from .arch file --- + // SAMPLE_O, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_O + + Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_O + + // --- description from .arch file --- + // SAMPLE_C with user specified offsets. + void + Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_O + + Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_L_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_L_O + + Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_O + + Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O + + // --- description from .arch file --- + // SAMPLE_C_O, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O + + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O + + // --- description from .arch file --- + // SAMPLE_C_O, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4 class methods --- + + Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4 + + Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() + { + } // ~Inst_MIMG__IMAGE_GATHER4 + + // --- description from .arch file --- + // gather 4 single component elements (2x2). + void + Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_CL + + Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD clamp. + void + Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_L class methods --- + + Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_L + + Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() + { + } // ~Inst_MIMG__IMAGE_GATHER4_L + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD. + void + Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B class methods --- + + Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B + + Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias. + void + Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_CL + + Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias and clamp. + void + Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_LZ class methods --- + + Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_LZ + + Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() + { + } // ~Inst_MIMG__IMAGE_GATHER4_LZ + + // --- description from .arch file --- + // gather 4 single component elements (2x2) at level 0. + void + Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C class methods --- + + Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C + + Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with PCF. + void + Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_CL + + Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD clamp and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_L class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_L + + Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_L + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B + + Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_CL + + Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias, clamp and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_LZ class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_LZ + + Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ + + // --- description from .arch file --- + // gather 4 single component elements (2x2) at level 0, with PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_O + + Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_O + + // --- description from .arch file --- + // GATHER4, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_CL_O + + Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_CL_O + + // --- description from .arch file --- + // GATHER4_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_L_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_L_O + + Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_L_O + + // --- description from .arch file --- + // GATHER4_L, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_O + + Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_O + + // --- description from .arch file --- + // GATHER4_B, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_CL_O + + Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O + + // --- description from .arch file --- + // GATHER4_B_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_LZ_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_LZ_O + + Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O + + // --- description from .arch file --- + // GATHER4_LZ, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_O + + Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_O + + // --- description from .arch file --- + // GATHER4_C, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_CL_O + + Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O + + // --- description from .arch file --- + // GATHER4_C_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_L_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_L_O + + Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O + + // --- description from .arch file --- + // GATHER4_C_L, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_O + + Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O + + // --- description from .arch file --- + // GATHER4_B, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O + + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O + + // --- description from .arch file --- + // GATHER4_B_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_LZ_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O + + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O + + // --- description from .arch file --- + // GATHER4_C_LZ, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GET_LOD class methods --- + + Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_get_lod") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GET_LOD + + Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() + { + } // ~Inst_MIMG__IMAGE_GET_LOD + + // --- description from .arch file --- + // Return calculated LOD. Vdata gets 2 32bit integer values: { rawLOD, + // --- clampedLOD }. + void + Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD + + Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD + + // --- description from .arch file --- + // sample texture map, with user derivatives (LOD per quad) + void + Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_CL + + Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with user + // --- derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD + + Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD + + // --- description from .arch file --- + // SAMPLE_C, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_O + + Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O + + // --- description from .arch file --- + // SAMPLE_O, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O + + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O + + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mtbuf.cc b/src/arch/amdgpu/vega/insts/mtbuf.cc new file mode 100644 index 0000000000..2b37dfd6b9 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mtbuf.cc @@ -0,0 +1,584 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_X class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + + // --- description from .arch file --- + // Typed buffer load 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + + // --- description from .arch file --- + // Typed buffer load 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + + // --- description from .arch file --- + // Typed buffer load 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + + // --- description from .arch file --- + // Typed buffer load 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_X class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_X + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X + + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X + + // --- description from .arch file --- + // Typed buffer store 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XY class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + + // --- description from .arch file --- + // Typed buffer store 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + + // --- description from .arch file --- + // Typed buffer store 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + + // --- description from .arch file --- + // Typed buffer store 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: + ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + + // --- description from .arch file --- + // Typed buffer load 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + + // --- description from .arch file --- + // Typed buffer load 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( + InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Typed buffer load 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( + InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Typed buffer load 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + + // --- description from .arch file --- + // Typed buffer store 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + + // --- description from .arch file --- + // Typed buffer store 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Typed buffer store 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Typed buffer store 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( + GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mubuf.cc b/src/arch/amdgpu/vega/insts/mubuf.cc new file mode 100644 index 0000000000..ff8bae2475 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mubuf.cc @@ -0,0 +1,2789 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_X + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X + + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X + + // --- description from .arch file --- + // Untyped buffer load 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + + // --- description from .arch file --- + // Untyped buffer load 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + + // --- description from .arch file --- + // Untyped buffer load 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + + // --- description from .arch file --- + // Untyped buffer load 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_X + ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_X + + Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X + + // --- description from .arch file --- + // Untyped buffer store 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XY + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY + + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY + + // --- description from .arch file --- + // Untyped buffer store 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + + // --- description from .arch file --- + // Untyped buffer store 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + + // --- description from .arch file --- + // Untyped buffer store 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + + // --- description from .arch file --- + // Untyped buffer load 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + + // --- description from .arch file --- + // Untyped buffer load 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Untyped buffer load 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Untyped buffer load 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") + { + setFlag(Store); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + + // --- description from .arch file --- + // Untyped buffer store 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + + // --- description from .arch file --- + // Untyped buffer store 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Untyped buffer store 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Untyped buffer store 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods --- + + Inst_MUBUF__BUFFER_LOAD_UBYTE + ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_ubyte") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_UBYTE + + Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() + { + } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE + + // --- description from .arch file --- + // Untyped buffer load unsigned byte (zero extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // execute + + // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods --- + + Inst_MUBUF__BUFFER_LOAD_SBYTE + ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_sbyte") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_SBYTE + + Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE + + // --- description from .arch file --- + // Untyped buffer load signed byte (sign extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods --- + + Inst_MUBUF__BUFFER_LOAD_USHORT + ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_ushort") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_USHORT + + Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() + { + } // ~Inst_MUBUF__BUFFER_LOAD_USHORT + + // --- description from .arch file --- + // Untyped buffer load unsigned short (zero extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // execute + + // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods --- + + Inst_MUBUF__BUFFER_LOAD_SSHORT + ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_sshort") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_SSHORT + + Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT + + // --- description from .arch file --- + // Untyped buffer load signed short (sign extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORD + ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORD + + Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORD + + // --- description from .arch file --- + // Untyped buffer load dword. + void + Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX2 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 + + Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer load 2 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX3 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx3") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 + + Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer load 3 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<3>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + vdst2[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX4 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 + + Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer load 4 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + vdst2[lane] = 0; + vdst3[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods --- + + Inst_MUBUF__BUFFER_STORE_BYTE + ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_byte") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_BYTE + + Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() + { + } // ~Inst_MUBUF__BUFFER_STORE_BYTE + + // --- description from .arch file --- + // Untyped buffer store byte. + void + Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandI8 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods --- + + Inst_MUBUF__BUFFER_STORE_SHORT + ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_short") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_SHORT + + Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() + { + } // ~Inst_MUBUF__BUFFER_STORE_SHORT + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandI16 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORD:: + Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORD + + Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORD + + // --- description from .arch file --- + // Untyped buffer store dword. + void + Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX2 + ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX2 + + Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer store 2 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<2>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX3 + ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx3") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX3 + + Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer store 3 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + data2.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] + = data2[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<3>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX4 + ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX4 + + Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer store 4 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] + = data2[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 3] + = data3[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods --- + + Inst_MUBUF__BUFFER_STORE_LDS_DWORD + ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_lds_dword") + { + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD + + Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() + { + } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD + + // --- description from .arch file --- + // Store one DWORD from LDS memory to system memory without utilizing + // VGPRs. + void + Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_WBINVL1 class methods --- + + Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_wbinvl1") + { + setFlag(MemoryRef); + setFlag(GPUStaticInst::MemSync); + setFlag(GlobalSegment); + setFlag(MemSync); + } // Inst_MUBUF__BUFFER_WBINVL1 + + Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() + { + } // ~Inst_MUBUF__BUFFER_WBINVL1 + + // --- description from .arch file --- + // Write back and invalidate the shader L1. + // Always returns ACK to shader. + void + Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } + } // execute + + void + Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) + { + // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we + // need to precisely communicate the writeback-invalidate operation to + // the new gfx10 coalescer rather than sending AcquireRelease markers. + // The SICoalescer would need to be updated appropriately as well. + injectGlobalMemFence(gpuDynInst); + } // initiateAcc + void + Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods --- + + Inst_MUBUF__BUFFER_WBINVL1_VOL + ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) + : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { + // This instruction is same as buffer_wbinvl1 instruction except this + // instruction only invalidate L1 shader line with MTYPE SC and GC. + // Since Hermes L1 (TCP) do not differentiate between its cache lines, + // this instruction currently behaves (and implemented ) exactly like + // buffer_wbinvl1 instruction. + setFlag(MemoryRef); + setFlag(GPUStaticInst::MemSync); + setFlag(GlobalSegment); + setFlag(MemSync); + } // Inst_MUBUF__BUFFER_WBINVL1_VOL + + Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() + { + } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL + + // --- description from .arch file --- + // Write back and invalidate the shader L1 only for lines that are marked + // --- volatile. + // Always returns ACK to shader. + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } + } // execute + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) + { + injectGlobalMemFence(gpuDynInst); + } // initiateAcc + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SWAP + ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SWAP + + Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 src(gpuDynInst, extData.VDATA); + ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1); + + rsrcDesc.read(); + offset.read(); + src.read(); + cmp.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->x_data))[lane] + = src[lane]; + (reinterpret_cast(gpuDynInst->a_data))[lane] + = cmp[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc + // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_ADD + ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_ADD + + Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SUB + ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SUB + + Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMIN + ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMIN + + Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMIN + ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMIN + + Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMAX + ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMAX + + Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMAX + ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMAX + + Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_AND + ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_AND + + Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_OR + ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_OR + + Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_XOR + ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_XOR + + Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_INC + ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_INC + + Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_DEC + ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_DEC + + Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA[0:1]; + // cmp = DATA[2:3]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + + Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + + Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + + Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/smem.cc b/src/arch/amdgpu/vega/insts/smem.cc new file mode 100644 index 0000000000..a6af4f007d --- /dev/null +++ b/src/arch/amdgpu/vega/insts/smem.cc @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SMEM__S_LOAD_DWORD class methods --- + + Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORD + + Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() + { + } // ~Inst_SMEM__S_LOAD_DWORD + + /** + * Read 1 dword from scalar data cache. If the offset is specified as an + * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are + * ignored). If the offset is specified as an immediate 20-bit constant, + * the constant is an unsigned byte offset. + */ + void + Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX2 class methods --- + + Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX2 + + Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() + { + } // ~Inst_SMEM__S_LOAD_DWORDX2 + + /** + * Read 2 dwords from scalar data cache. See s_load_dword for details on + * the offset input. + */ + void + Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX4 class methods --- + + Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX4 + + Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() + { + } // ~Inst_SMEM__S_LOAD_DWORDX4 + + // --- description from .arch file --- + // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX8 class methods --- + + Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX8 + + Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() + { + } // ~Inst_SMEM__S_LOAD_DWORDX8 + + // --- description from .arch file --- + // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<8>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX16 class methods --- + + Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX16 + + Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() + { + } // ~Inst_SMEM__S_LOAD_DWORDX16 + + // --- description from .arch file --- + // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<16>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORD class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORD + + Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD + + // --- description from .arch file --- + // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the + // --- offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 1 request, size 32 + ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX2 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 + + // --- description from .arch file --- + // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + // use U64 because 2 requests, each size 32 + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX4 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 + + // --- description from .arch file --- + // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 4 requests, each size 32 + ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX8 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 + + // --- description from .arch file --- + // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<8>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 8 requests, each size 32 + ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX16 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 + + // --- description from .arch file --- + // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<16>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 16 requests, each size 32 + ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORD class methods --- + + Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORD + + Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() + { + } // ~Inst_SMEM__S_STORE_DWORD + + // --- description from .arch file --- + // Write 1 dword to scalar data cache. + // If the offset is specified as an SGPR, the SGPR contains an unsigned + // BYTE offset (the 2 LSBs are ignored). + // If the offset is specified as an immediate 20-bit constant, the + // constant is an unsigned BYTE offset. + void + Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(ScalarRegU32)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORDX2 class methods --- + + Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORDX2 + + Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() + { + } // ~Inst_SMEM__S_STORE_DWORDX2 + + // --- description from .arch file --- + // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(ScalarRegU64)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORDX4 class methods --- + + Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORDX4 + + Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() + { + } // ~Inst_SMEM__S_STORE_DWORDX4 + + // --- description from .arch file --- + // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(gpuDynInst->scalar_data)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORD class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORD + + Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORD + + // --- description from .arch file --- + // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the + // --- offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORDX2 class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 + + Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 + + // --- description from .arch file --- + // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORDX4 class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 + + Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 + + // --- description from .arch file --- + // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_DCACHE_INV class methods --- + + Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_inv") + { + } // Inst_SMEM__S_DCACHE_INV + + Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() + { + } // ~Inst_SMEM__S_DCACHE_INV + + // --- description from .arch file --- + // Invalidate the scalar data cache. + void + Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_WB class methods --- + + Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_wb") + { + } // Inst_SMEM__S_DCACHE_WB + + Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() + { + } // ~Inst_SMEM__S_DCACHE_WB + + // --- description from .arch file --- + // Write back dirty data in the scalar data cache. + void + Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_INV_VOL class methods --- + + Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_inv_vol") + { + } // Inst_SMEM__S_DCACHE_INV_VOL + + Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() + { + } // ~Inst_SMEM__S_DCACHE_INV_VOL + + // --- description from .arch file --- + // Invalidate the scalar data cache volatile lines. + void + Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_WB_VOL class methods --- + + Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_wb_vol") + { + } // Inst_SMEM__S_DCACHE_WB_VOL + + Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() + { + } // ~Inst_SMEM__S_DCACHE_WB_VOL + + // --- description from .arch file --- + // Write back dirty data in the scalar data cache volatile lines. + void + Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_MEMTIME class methods --- + + Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_memtime") + { + // s_memtime does not issue a memory request + setFlag(ALU); + } // Inst_SMEM__S_MEMTIME + + Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() + { + } // ~Inst_SMEM__S_MEMTIME + + // --- description from .arch file --- + // Return current 64-bit timestamp. + void + Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); + sdst.write(); + } // execute + // --- Inst_SMEM__S_MEMREALTIME class methods --- + + Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_memrealtime") + { + } // Inst_SMEM__S_MEMREALTIME + + Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() + { + } // ~Inst_SMEM__S_MEMREALTIME + + // --- description from .arch file --- + // Return current 64-bit RTC. + void + Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_ATC_PROBE class methods --- + + Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_atc_probe") + { + } // Inst_SMEM__S_ATC_PROBE + + Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() + { + } // ~Inst_SMEM__S_ATC_PROBE + + // --- description from .arch file --- + // Probe or prefetch an address into the SQC data cache. + void + Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_ATC_PROBE_BUFFER class methods --- + + Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_atc_probe_buffer") + { + } // Inst_SMEM__S_ATC_PROBE_BUFFER + + Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() + { + } // ~Inst_SMEM__S_ATC_PROBE_BUFFER + + // --- description from .arch file --- + // Probe or prefetch an address into the SQC data cache. + void + Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sop1.cc b/src/arch/amdgpu/vega/insts/sop1.cc new file mode 100644 index 0000000000..fa9a103e39 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sop1.cc @@ -0,0 +1,1504 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOP1__S_MOV_B32 class methods --- + + Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_B32 + + Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() + { + } // ~Inst_SOP1__S_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + void + Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOV_B64 class methods --- + + Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_B64 + + Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() + { + } // ~Inst_SOP1__S_MOV_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64. + void + Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_CMOV_B32 class methods --- + + Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cmov_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_CMOV_B32 + + Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() + { + } // ~Inst_SOP1__S_CMOV_B32 + + // --- description from .arch file --- + // (SCC) then D.u = S0.u; + // else NOP. + // Conditional move. + void + Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + scc.read(); + + if (scc.rawData()) { + sdst = src.rawData(); + sdst.write(); + } + } // execute + // --- Inst_SOP1__S_CMOV_B64 class methods --- + + Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cmov_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_CMOV_B64 + + Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() + { + } // ~Inst_SOP1__S_CMOV_B64 + + // --- description from .arch file --- + // if (SCC) then D.u64 = S0.u64; + // else NOP. + // Conditional move. + void + Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + scc.read(); + + if (scc.rawData()) { + sdst = src.rawData(); + sdst.write(); + } + } // execute + // --- Inst_SOP1__S_NOT_B32 class methods --- + + Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_not_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_NOT_B32 + + Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() + { + } // ~Inst_SOP1__S_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u; + // SCC = 1 if result is non-zero. + // Bitwise negation. + void + Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = ~src.rawData(); + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NOT_B64 class methods --- + + Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_not_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_NOT_B64 + + Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() + { + } // ~Inst_SOP1__S_NOT_B64 + + // --- description from .arch file --- + // D.u64 = ~S0.u64; + // SCC = 1 if result is non-zero. + // Bitwise negation. + void + Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = ~src.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_WQM_B32 class methods --- + + Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_wqm_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_WQM_B32 + + Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() + { + } // ~Inst_SOP1__S_WQM_B32 + + // --- description from .arch file --- + // D[i] = (S0[(i & ~3):(i | 3)] != 0); + // Computes whole quad mode for an active/valid mask. + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wholeQuadMode(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_WQM_B64 class methods --- + + Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_wqm_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_WQM_B64 + + Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() + { + } // ~Inst_SOP1__S_WQM_B64 + + // --- description from .arch file --- + // D[i] = (S0[(i & ~3):(i | 3)] != 0); + // Computes whole quad mode for an active/valid mask. + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wholeQuadMode(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BREV_B32 class methods --- + + Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_brev_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BREV_B32 + + Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() + { + } // ~Inst_SOP1__S_BREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31] (reverse bits). + void + Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = reverseBits(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BREV_B64 class methods --- + + Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_brev_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BREV_B64 + + Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() + { + } // ~Inst_SOP1__S_BREV_B64 + + // --- description from .arch file --- + // D.u64[63:0] = S0.u64[0:63] (reverse bits). + void + Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = reverseBits(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BCNT0_I32_B32 class methods --- + + Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT0_I32_B32 + + Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() + { + } // ~Inst_SOP1__S_BCNT0_I32_B32 + + // --- description from .arch file --- + // D.i = CountZeroBits(S0.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = countZeroBits(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT0_I32_B64 class methods --- + + Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT0_I32_B64 + + Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() + { + } // ~Inst_SOP1__S_BCNT0_I32_B64 + + // --- description from .arch file --- + // D.i = CountZeroBits(S0.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = countZeroBits(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT1_I32_B32 class methods --- + + Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT1_I32_B32 + + Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() + { + } // ~Inst_SOP1__S_BCNT1_I32_B32 + + // --- description from .arch file --- + // D.i = CountOneBits(S0.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = popCount(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT1_I32_B64 class methods --- + + Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT1_I32_B64 + + Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() + { + } // ~Inst_SOP1__S_BCNT1_I32_B64 + + // --- description from .arch file --- + // D.i = CountOneBits(S0.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = popCount(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_FF0_I32_B32 class methods --- + + Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff0_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FF0_I32_B32 + + Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() + { + } // ~Inst_SOP1__S_FF0_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstZero(S0.u); + // If no zeros are found, return -1. + // Returns the bit position of the first zero from the LSB. + void + Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstZero(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF0_I32_B64 class methods --- + + Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff0_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FF0_I32_B64 + + Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() + { + } // ~Inst_SOP1__S_FF0_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstZero(S0.u64); + // If no zeros are found, return -1. + // Returns the bit position of the first zero from the LSB. + void + Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstZero(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF1_I32_B32 class methods --- + + Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff1_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FF1_I32_B32 + + Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() + { + } // ~Inst_SOP1__S_FF1_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u); + // If no ones are found, return -1. + // Returns the bit position of the first one from the LSB. + void + Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstOne(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF1_I32_B64 class methods --- + + Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff1_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FF1_I32_B64 + + Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() + { + } // ~Inst_SOP1__S_FF1_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u64); + // If no ones are found, return -1. + // Returns the bit position of the first one from the LSB. + void + Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstOne(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_B32 class methods --- + + Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_B32 + + Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() + { + } // ~Inst_SOP1__S_FLBIT_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u); + // If no ones are found, return -1. + // Counts how many zeros before the first one starting from the MSB. + void + Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = countZeroBitsMsb(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_B64 class methods --- + + Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_B64 + + Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() + { + } // ~Inst_SOP1__S_FLBIT_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u64); + // If no ones are found, return -1. + // Counts how many zeros before the first one starting from the MSB. + void + Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = countZeroBitsMsb(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32 class methods --- + + Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32 + + Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() + { + } // ~Inst_SOP1__S_FLBIT_I32 + + // --- description from .arch file --- + // D.i = FirstOppositeSignBit(S0.i); + // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. + // Counts how many bits in a row (from MSB to LSB) are the same as the + // sign bit. + void + Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = firstOppositeSignBit(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_I64 class methods --- + + Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_i64") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_I64 + + Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() + { + } // ~Inst_SOP1__S_FLBIT_I32_I64 + + // --- description from .arch file --- + // D.i = FirstOppositeSignBit(S0.i64); + // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. + // Counts how many bits in a row (from MSB to LSB) are the same as the + // sign bit. + void + Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = firstOppositeSignBit(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SEXT_I32_I8 class methods --- + + Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_sext_i32_i8") + { + setFlag(ALU); + } // Inst_SOP1__S_SEXT_I32_I8 + + Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() + { + } // ~Inst_SOP1__S_SEXT_I32_I8 + + // --- description from .arch file --- + // D.i = signext(S0.i[7:0]) (sign extension). + void + Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = sext::digits>( + bits(src.rawData(), 7, 0)); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SEXT_I32_I16 class methods --- + + Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_sext_i32_i16") + { + setFlag(ALU); + } // Inst_SOP1__S_SEXT_I32_I16 + + Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() + { + } // ~Inst_SOP1__S_SEXT_I32_I16 + + // --- description from .arch file --- + // D.i = signext(S0.i[15:0]) (sign extension). + void + Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = sext::digits>( + bits(src.rawData(), 15, 0)); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET0_B32 class methods --- + + Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset0_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET0_B32 + + Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() + { + } // ~Inst_SOP1__S_BITSET0_B32 + + // --- description from .arch file --- + // D.u[S0.u[4:0]] = 0. + void + Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 4, 0), 0); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET0_B64 class methods --- + + Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset0_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET0_B64 + + Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() + { + } // ~Inst_SOP1__S_BITSET0_B64 + + // --- description from .arch file --- + // D.u64[S0.u[5:0]] = 0. + void + Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 5, 0), 0); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET1_B32 class methods --- + + Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset1_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET1_B32 + + Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() + { + } // ~Inst_SOP1__S_BITSET1_B32 + + // --- description from .arch file --- + // D.u[S0.u[4:0]] = 1. + void + Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 4, 0), 1); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET1_B64 class methods --- + + Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset1_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET1_B64 + + Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() + { + } // ~Inst_SOP1__S_BITSET1_B64 + + // --- description from .arch file --- + // D.u64[S0.u[5:0]] = 1. + void + Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 5, 0), 1); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_GETPC_B64 class methods --- + + Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_getpc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_GETPC_B64 + + Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() + { + } // ~Inst_SOP1__S_GETPC_B64 + + // --- description from .arch file --- + // D.u64 = PC + 4. + // Destination receives the byte address of the next instruction. + void + Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Addr pc = gpuDynInst->pc(); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + sdst = pc + 4; + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SETPC_B64 class methods --- + + Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_setpc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_SETPC_B64 + + Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() + { + } // ~Inst_SOP1__S_SETPC_B64 + + // --- description from .arch file --- + // PC = S0.u64. + // S0.u64 is a byte address of the instruction to jump to. + void + Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + + src.read(); + + wf->pc(src.rawData()); + } // execute + // --- Inst_SOP1__S_SWAPPC_B64 class methods --- + + Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_swappc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_SWAPPC_B64 + + Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() + { + } // ~Inst_SOP1__S_SWAPPC_B64 + + // --- description from .arch file --- + // D.u64 = PC + 4; PC = S0.u64. + // S0.u64 is a byte address of the instruction to jump to. + void + Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = pc + 4; + + wf->pc(src.rawData()); + sdst.write(); + } // execute + // --- Inst_SOP1__S_RFE_B64 class methods --- + + Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_rfe_b64") + { + } // Inst_SOP1__S_RFE_B64 + + Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() + { + } // ~Inst_SOP1__S_RFE_B64 + + // --- description from .arch file --- + // PRIV = 0; + // PC = S0.u64. + // Return from exception handler and continue. + // This instruction may only be used within a trap handler. + void + Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_and_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_AND_SAVEEXEC_B64 + + Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 & EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() & wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_or_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_OR_SAVEEXEC_B64 + + Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 | EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() | wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_xor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_XOR_SAVEEXEC_B64 + + Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 ^ EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 + + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 & ~EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_ORN2_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 + + Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 | ~EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NAND_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_nand_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_NAND_SAVEEXEC_B64 + + Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 & EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_nor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_NOR_SAVEEXEC_B64 + + Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 | EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_XNOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 + + Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 ^ EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_QUADMASK_B32 class methods --- + + Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_quadmask_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_QUADMASK_B32 + + Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() + { + } // ~Inst_SOP1__S_QUADMASK_B32 + + // --- description from .arch file --- + // D.u = QuadMask(S0.u): + // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = quadMask(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_QUADMASK_B64 class methods --- + + Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_quadmask_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_QUADMASK_B64 + + Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() + { + } // ~Inst_SOP1__S_QUADMASK_B64 + + // --- description from .arch file --- + // D.u64 = QuadMask(S0.u64): + // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = quadMask(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_MOVRELS_B32 class methods --- + + Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movrels_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELS_B32 + + Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() + { + } // ~Inst_SOP1__S_MOVRELS_B32 + + // --- description from .arch file --- + // D.u = SGPR[S0.u + M0.u].u (move from relative source). + void + Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELS_B64 class methods --- + + Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movrels_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELS_B64 + + Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() + { + } // ~Inst_SOP1__S_MOVRELS_B64 + + // --- description from .arch file --- + // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). + // The index in M0.u must be even for this operation. + void + Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELD_B32 class methods --- + + Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movreld_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELD_B32 + + Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() + { + } // ~Inst_SOP1__S_MOVRELD_B32 + + // --- description from .arch file --- + // SGPR[D.u + M0.u].u = S0.u (move to relative destination). + void + Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELD_B64 class methods --- + + Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movreld_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELD_B64 + + Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() + { + } // ~Inst_SOP1__S_MOVRELD_B64 + + // --- description from .arch file --- + // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). + // The index in M0.u must be even for this operation. + void + Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_CBRANCH_JOIN class methods --- + + Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cbranch_join") + { + setFlag(Branch); + setFlag(WritesEXEC); + } // Inst_SOP1__S_CBRANCH_JOIN + + Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() + { + } // ~Inst_SOP1__S_CBRANCH_JOIN + + // --- description from .arch file --- + // saved_csp = S0.u; + // if (CSP == saved_csp) then + // PC += 4; // Second time to JOIN: continue with program. + // else + // CSP -= 1; // First time to JOIN; jump to other FORK path. + // {PC, EXEC} = SGPR[CSP * 4]; // Read 128 bits from 4 consecutive + // SGPRs. + // end + // Conditional branch join point (end of conditional branch block). S0 is + // saved CSP value. + // See S_CBRANCH_G_FORK and S_CBRANCH_I_FORK for related instructions. + void + Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_ABS_I32 class methods --- + + Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_abs_i32") + { + setFlag(ALU); + } // Inst_SOP1__S_ABS_I32 + + Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() + { + } // ~Inst_SOP1__S_ABS_I32 + + // --- description from .arch file --- + // if (S.i < 0) then D.i = -S.i; + // else D.i = S.i; + // SCC = 1 if result is non-zero. + // Integer absolute value. + void + Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = std::abs(src.rawData()); + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_MOV_FED_B32 class methods --- + + Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_fed_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_FED_B32 + + Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() + { + } // ~Inst_SOP1__S_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u. Introduce an EDC double-detect error on write to the + // destination SGPR. + void + Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_SET_GPR_IDX_IDX class methods --- + + Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") + { + } // Inst_SOP1__S_SET_GPR_IDX_IDX + + Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() + { + } // ~Inst_SOP1__S_SET_GPR_IDX_IDX + + // --- description from .arch file --- + // M0[7:0] = S0.u[7:0]. + // Modify the index used in vector GPR indexing. + void + Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sop2.cc b/src/arch/amdgpu/vega/insts/sop2.cc new file mode 100644 index 0000000000..93618b2124 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sop2.cc @@ -0,0 +1,1555 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOP2__S_ADD_U32 class methods --- + + Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_add_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADD_U32 + + Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() + { + } // ~Inst_SOP2__S_ADD_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned + // --- overflow/carry-out for S_ADDC_U32. + void + Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() + src1.rawData(); + scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) + >= 0x100000000ULL ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUB_U32 class methods --- + + Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_sub_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUB_U32 + + Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() + { + } // ~Inst_SOP2__S_SUB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for + // --- S_SUBB_U32. + void + Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() - src1.rawData(); + scc = (src1.rawData() > src0.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ADD_I32 class methods --- + + Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_add_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADD_I32 + + Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() + { + } // ~Inst_SOP2__S_ADD_I32 + + // --- description from .arch file --- + // D.i = S0.i + S1.i; + // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed + // overflow. + // This opcode is not suitable for use with S_ADDC_U32 for implementing + // 64-bit operations. + void + Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() + src1.rawData(); + scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) + && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) + ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUB_I32 class methods --- + + Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_sub_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUB_I32 + + Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() + { + } // ~Inst_SOP2__S_SUB_I32 + + // --- description from .arch file --- + // D.i = S0.i - S1.i; + // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed + // overflow. + // CAUTION: The condition code behaviour for this opcode is inconsistent + // with V_SUB_I32; see V_SUB_I32 for further details. + // This opcode is not suitable for use with S_SUBB_U32 for implementing + // 64-bit operations. + void + Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() - src1.rawData(); + scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) + && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ADDC_U32 class methods --- + + Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_addc_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADDC_U32 + + Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() + { + } // ~Inst_SOP2__S_ADDC_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + SCC; + // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned + // overflow. + void + Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = src0.rawData() + src1.rawData() + scc.rawData(); + scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() + + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUBB_U32 class methods --- + + Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_subb_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUBB_U32 + + Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() + { + } // ~Inst_SOP2__S_SUBB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - SCC; + // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. + void + Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = src0.rawData() - src1.rawData() - scc.rawData(); + scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MIN_I32 class methods --- + + Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_min_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MIN_I32 + + Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() + { + } // ~Inst_SOP2__S_MIN_I32 + + // --- description from .arch file --- + // D.i = (S0.i < S1.i) ? S0.i : S1.i; + // SCC = 1 if S0 is chosen as the minimum value. + void + Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::min(src0.rawData(), src1.rawData()); + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MIN_U32 class methods --- + + Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_min_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MIN_U32 + + Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() + { + } // ~Inst_SOP2__S_MIN_U32 + + // --- description from .arch file --- + // D.u = (S0.u < S1.u) ? S0.u : S1.u; + // SCC = 1 if S0 is chosen as the minimum value. + void + Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::min(src0.rawData(), src1.rawData()); + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MAX_I32 class methods --- + + Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_max_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MAX_I32 + + Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() + { + } // ~Inst_SOP2__S_MAX_I32 + + // --- description from .arch file --- + // D.i = (S0.i > S1.i) ? S0.i : S1.i; + // SCC = 1 if S0 is chosen as the maximum value. + void + Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::max(src0.rawData(), src1.rawData()); + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MAX_U32 class methods --- + + Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_max_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MAX_U32 + + Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() + { + } // ~Inst_SOP2__S_MAX_U32 + + // --- description from .arch file --- + // D.u = (S0.u > S1.u) ? S0.u : S1.u; + // SCC = 1 if S0 is chosen as the maximum value. + void + Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::max(src0.rawData(), src1.rawData()); + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_CSELECT_B32 class methods --- + + Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cselect_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_CSELECT_B32 + + Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() + { + } // ~Inst_SOP2__S_CSELECT_B32 + + // --- description from .arch file --- + // D.u = SCC ? S0.u : S1.u (conditional select). + void + Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = scc.rawData() ? src0.rawData() : src1.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_CSELECT_B64 class methods --- + + Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cselect_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_CSELECT_B64 + + Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() + { + } // ~Inst_SOP2__S_CSELECT_B64 + + // --- description from .arch file --- + // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). + void + Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = scc.rawData() ? src0.rawData() : src1.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_AND_B32 class methods --- + + Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_and_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_AND_B32 + + Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() + { + } // ~Inst_SOP2__S_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() & src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_AND_B64 class methods --- + + Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_and_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_AND_B64 + + Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() + { + } // ~Inst_SOP2__S_AND_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 & S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() & src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_OR_B32 class methods --- + + Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_or_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_OR_B32 + + Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() + { + } // ~Inst_SOP2__S_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() | src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_OR_B64 class methods --- + + Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_or_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_OR_B64 + + Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() + { + } // ~Inst_SOP2__S_OR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 | S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() | src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XOR_B32 class methods --- + + Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_XOR_B32 + + Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() + { + } // ~Inst_SOP2__S_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() ^ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XOR_B64 class methods --- + + Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_XOR_B64 + + Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() + { + } // ~Inst_SOP2__S_XOR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 ^ S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() ^ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ANDN2_B32 class methods --- + + Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_andn2_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_ANDN2_B32 + + Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() + { + } // ~Inst_SOP2__S_ANDN2_B32 + + // --- description from .arch file --- + // D.u = S0.u & ~S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() &~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ANDN2_B64 class methods --- + + Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_andn2_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_ANDN2_B64 + + Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() + { + } // ~Inst_SOP2__S_ANDN2_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 & ~S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() &~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ORN2_B32 class methods --- + + Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_orn2_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_ORN2_B32 + + Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() + { + } // ~Inst_SOP2__S_ORN2_B32 + + // --- description from .arch file --- + // D.u = S0.u | ~S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() |~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ORN2_B64 class methods --- + + Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_orn2_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_ORN2_B64 + + Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() + { + } // ~Inst_SOP2__S_ORN2_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 | ~S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() |~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NAND_B32 class methods --- + + Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nand_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_NAND_B32 + + Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() + { + } // ~Inst_SOP2__S_NAND_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u & S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() & src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NAND_B64 class methods --- + + Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nand_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_NAND_B64 + + Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() + { + } // ~Inst_SOP2__S_NAND_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 & S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() & src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NOR_B32 class methods --- + + Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_NOR_B32 + + Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() + { + } // ~Inst_SOP2__S_NOR_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u | S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() | src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NOR_B64 class methods --- + + Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_NOR_B64 + + Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() + { + } // ~Inst_SOP2__S_NOR_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 | S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() | src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XNOR_B32 class methods --- + + Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xnor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_XNOR_B32 + + Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() + { + } // ~Inst_SOP2__S_XNOR_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u ^ S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() ^ src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XNOR_B64 class methods --- + + Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xnor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_XNOR_B64 + + Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() + { + } // ~Inst_SOP2__S_XNOR_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 ^ S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() ^ src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHL_B32 class methods --- + + Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshl_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHL_B32 + + Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() + { + } // ~Inst_SOP2__S_LSHL_B32 + + // --- description from .arch file --- + // D.u = S0.u << S1.u[4:0]; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHL_B64 class methods --- + + Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshl_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHL_B64 + + Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() + { + } // ~Inst_SOP2__S_LSHL_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 << S1.u[5:0]; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHR_B32 class methods --- + + Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshr_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHR_B32 + + Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() + { + } // ~Inst_SOP2__S_LSHR_B32 + + // --- description from .arch file --- + // D.u = S0.u >> S1.u[4:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to zero. + void + Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHR_B64 class methods --- + + Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshr_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHR_B64 + + Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() + { + } // ~Inst_SOP2__S_LSHR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 >> S1.u[5:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to zero. + void + Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ASHR_I32 class methods --- + + Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_ashr_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ASHR_I32 + + Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() + { + } // ~Inst_SOP2__S_ASHR_I32 + + // --- description from .arch file --- + // D.i = signext(S0.i) >> S1.u[4:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to the sign bit of the input value. + void + Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ASHR_I64 class methods --- + + Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_ashr_i64") + { + setFlag(ALU); + } // Inst_SOP2__S_ASHR_I64 + + Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() + { + } // ~Inst_SOP2__S_ASHR_I64 + + // --- description from .arch file --- + // D.i64 = signext(S0.i64) >> S1.u[5:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to the sign bit of the input value. + void + Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFM_B32 class methods --- + + Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfm_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFM_B32 + + Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() + { + } // ~Inst_SOP2__S_BFM_B32 + + // --- description from .arch file --- + // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). + void + Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) + << bits(src1.rawData(), 4, 0); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_BFM_B64 class methods --- + + Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfm_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFM_B64 + + Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() + { + } // ~Inst_SOP2__S_BFM_B64 + + // --- description from .arch file --- + // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). + void + Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) + << bits(src1.rawData(), 5, 0); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_MUL_I32 class methods --- + + Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_I32 + + Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() + { + } // ~Inst_SOP2__S_MUL_I32 + + // --- description from .arch file --- + // D.i = S0.i * S1.i. + void + Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + sdst = src0.rawData() * src1.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_BFE_U32 class methods --- + + Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_U32 + + Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() + { + } // ~Inst_SOP2__S_BFE_U32 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is + // field width. + // D.u = (S0.u>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_I32 class methods --- + + Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_I32 + + Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() + { + } // ~Inst_SOP2__S_BFE_I32 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is + // field width. + // D.i = (S0.i>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + // + // Note: The description in the Vega ISA manual does not mention to + // sign-extend the result. An update description can be found in the + // more recent RDNA3 manual here: + // https://developer.amd.com/wp-content/resources/ + // RDNA3_Shader_ISA_December2022.pdf + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | (0xffffffff << bits(src1.rawData(), 22, 16)); + } + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_U64 class methods --- + + Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_u64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_U64 + + Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() + { + } // ~Inst_SOP2__S_BFE_U64 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is + // field width. + // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_I64 class methods --- + + Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_i64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_I64 + + Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() + { + } // ~Inst_SOP2__S_BFE_I64 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is + // field width. + // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | 0xffffffffffffffff << bits(src1.rawData(), 22, 16); + } + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_CBRANCH_G_FORK class methods --- + + Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cbranch_g_fork") + { + setFlag(Branch); + } // Inst_SOP2__S_CBRANCH_G_FORK + + Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() + { + } // ~Inst_SOP2__S_CBRANCH_G_FORK + + // --- description from .arch file --- + // mask_pass = S0.u64 & EXEC; + // mask_fail = ~S0.u64 & EXEC; + // if (mask_pass == EXEC) + // PC = S1.u64; + // elsif (mask_fail == EXEC) + // PC += 4; + // elsif (bitcount(mask_fail) < bitcount(mask_pass)) + // EXEC = mask_fail; + // SGPR[CSP*4] = { S1.u64, mask_pass }; + // CSP++; + // PC += 4; + // else + // EXEC = mask_pass; + // SGPR[CSP*4] = { PC + 4, mask_fail }; + // CSP++; + // PC = S1.u64; + // end. + // Conditional branch using branch-stack. + // S0 = compare mask(vcc or any sgpr) and + // S1 = 64-bit byte address of target instruction. + // See also S_CBRANCH_JOIN. + void + Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP2__S_ABSDIFF_I32 class methods --- + + Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_absdiff_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ABSDIFF_I32 + + Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() + { + } // ~Inst_SOP2__S_ABSDIFF_I32 + + // --- description from .arch file --- + // D.i = S0.i - S1.i; + // if (D.i < 0) then D.i = -D.i; + // SCC = 1 if result is non-zero. + // Compute the absolute value of difference between two values. + void + Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + sdst = std::abs(src0.rawData() - src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods --- + + Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( + InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_rfe_restore_b64") + { + } // Inst_SOP2__S_RFE_RESTORE_B64 + + Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() + { + } // ~Inst_SOP2__S_RFE_RESTORE_B64 + + // --- description from .arch file --- + // PRIV = 0; + // PC = S0.u64; + // INST_ATC = S1.u32[0]. + // Return from exception handler and continue, possibly changing the + // --- instruction ATC mode. + // This instruction may only be used within a trap handler. + // Use this instruction when the main program may be in a different memory + // --- space than the trap handler. + void + Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP2__S_MUL_HI_U32 class methods --- + + Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_hi_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_HI_U32 + + Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32() + { + } // ~Inst_SOP2__S_MUL_HI_U32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32; + void + Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + VecElemU64 tmp_dst = + ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData()); + sdst = (tmp_dst >> 32); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_MUL_HI_I32 class methods --- + + Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_hi_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_HI_I32 + + Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32() + { + } // ~Inst_SOP2__S_MUL_HI_I32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32; + void + Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + VecElemI64 tmp_src0 = + sext::digits>(src0.rawData()); + VecElemI64 tmp_src1 = + sext::digits>(src1.rawData()); + sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + + sdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopc.cc b/src/arch/amdgpu/vega/insts/sopc.cc new file mode 100644 index 0000000000..9c58688e53 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopc.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPC__S_CMP_EQ_I32 class methods --- + + Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_I32 + + Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() + { + } // ~Inst_SOPC__S_CMP_EQ_I32 + + // --- description from .arch file --- + // SCC = (S0.i == S1.i). + void + Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_I32 class methods --- + + Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_I32 + + Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() + { + } // ~Inst_SOPC__S_CMP_LG_I32 + + // --- description from .arch file --- + // SCC = (S0.i != S1.i). + void + Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GT_I32 class methods --- + + Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_gt_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GT_I32 + + Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() + { + } // ~Inst_SOPC__S_CMP_GT_I32 + + // --- description from .arch file --- + // SCC = (S0.i > S1.i). + void + Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GE_I32 class methods --- + + Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_ge_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GE_I32 + + Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() + { + } // ~Inst_SOPC__S_CMP_GE_I32 + + // --- description from .arch file --- + // SCC = (S0.i >= S1.i). + void + Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LT_I32 class methods --- + + Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lt_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LT_I32 + + Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() + { + } // ~Inst_SOPC__S_CMP_LT_I32 + + // --- description from .arch file --- + // SCC = (S0.i < S1.i). + void + Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LE_I32 class methods --- + + Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_le_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LE_I32 + + Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() + { + } // ~Inst_SOPC__S_CMP_LE_I32 + + // --- description from .arch file --- + // SCC = (S0.i <= S1.i). + void + Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_EQ_U32 class methods --- + + Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_U32 + + Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() + { + } // ~Inst_SOPC__S_CMP_EQ_U32 + + // --- description from .arch file --- + // SCC = (S0.u == S1.u). + void + Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_U32 class methods --- + + Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_U32 + + Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() + { + } // ~Inst_SOPC__S_CMP_LG_U32 + + // --- description from .arch file --- + // SCC = (S0.u != S1.u). + void + Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GT_U32 class methods --- + + Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_gt_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GT_U32 + + Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() + { + } // ~Inst_SOPC__S_CMP_GT_U32 + + // --- description from .arch file --- + // SCC = (S0.u > S1.u). + void + Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GE_U32 class methods --- + + Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_ge_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GE_U32 + + Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() + { + } // ~Inst_SOPC__S_CMP_GE_U32 + + // --- description from .arch file --- + // SCC = (S0.u >= S1.u). + void + Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LT_U32 class methods --- + + Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lt_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LT_U32 + + Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() + { + } // ~Inst_SOPC__S_CMP_LT_U32 + + // --- description from .arch file --- + // SCC = (S0.u < S1.u). + void + Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LE_U32 class methods --- + + Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_le_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LE_U32 + + Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() + { + } // ~Inst_SOPC__S_CMP_LE_U32 + + // --- description from .arch file --- + // SCC = (S0.u <= S1.u). + void + Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP0_B32 class methods --- + + Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp0_b32") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP0_B32 + + Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() + { + } // ~Inst_SOPC__S_BITCMP0_B32 + + // --- description from .arch file --- + // SCC = (S0.u[S1.u[4:0]] == 0). + void + Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP1_B32 class methods --- + + Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp1_b32") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP1_B32 + + Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() + { + } // ~Inst_SOPC__S_BITCMP1_B32 + + // --- description from .arch file --- + // SCC = (S0.u[S1.u[4:0]] == 1). + void + Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP0_B64 class methods --- + + Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp0_b64") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP0_B64 + + Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() + { + } // ~Inst_SOPC__S_BITCMP0_B64 + + // --- description from .arch file --- + // SCC = (S0.u64[S1.u[5:0]] == 0). + void + Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP1_B64 class methods --- + + Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp1_b64") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP1_B64 + + Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() + { + } // ~Inst_SOPC__S_BITCMP1_B64 + + // --- description from .arch file --- + // SCC = (S0.u64[S1.u[5:0]] == 1). + void + Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_SETVSKIP class methods --- + + Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_setvskip") + { + } // Inst_SOPC__S_SETVSKIP + + Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() + { + } // ~Inst_SOPC__S_SETVSKIP + + // --- description from .arch file --- + // VSKIP = S0.u[S1.u[4:0]]. + // Enables and disables VSKIP mode. + // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are + // issued. + // If any vector operations are outstanding, S_WAITCNT must be issued + // before executing. + // This instruction requires one waitstate after executing (e.g. S_NOP 0). + // Example: + // s_waitcnt 0 + // s_setvskip 1, 0 // Enable vskip mode. + // s_nop 1 + void + Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPC__S_SET_GPR_IDX_ON class methods --- + + Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_set_gpr_idx_on") + { + } // Inst_SOPC__S_SET_GPR_IDX_ON + + Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() + { + } // ~Inst_SOPC__S_SET_GPR_IDX_ON + + // --- description from .arch file --- + // MODE.gpr_idx_en = 1; + // M0[7:0] = S0.u[7:0]; + // M0[15:12] = SIMM4 (direct contents of S1 field); + // // Remaining bits of M0 are unmodified. + // Enable GPR indexing mode. Vector operations after this will perform + // relative GPR addressing based on the contents of M0. The structure + // SQ_M0_GPR_IDX_WORD may be used to decode M0. + // The raw contents of the S1 field are read and used to set the enable + // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and + // S1[3] = VDST_REL. + void + Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPC__S_CMP_EQ_U64 class methods --- + + Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_u64") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_U64 + + Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() + { + } // ~Inst_SOPC__S_CMP_EQ_U64 + + // --- description from .arch file --- + // SCC = (S0.i64 == S1.i64). + void + Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_U64 class methods --- + + Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_u64") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_U64 + + Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() + { + } // ~Inst_SOPC__S_CMP_LG_U64 + + // --- description from .arch file --- + // SCC = (S0.i64 != S1.i64). + void + Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopk.cc b/src/arch/amdgpu/vega/insts/sopk.cc new file mode 100644 index 0000000000..7abbb9abb4 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopk.cc @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "dev/amdgpu/hwreg_defines.hh" +#include "gpu-compute/shader.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPK__S_MOVK_I32 class methods --- + + Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_movk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_MOVK_I32 + + Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() + { + } // ~Inst_SOPK__S_MOVK_I32 + + // --- description from .arch file --- + // D.i = signext(SIMM16) (sign extension). + void + Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + sdst = simm16; + + sdst.write(); + } // execute + // --- Inst_SOPK__S_CMOVK_I32 class methods --- + + Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmovk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMOVK_I32 + + Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() + { + } // ~Inst_SOPK__S_CMOVK_I32 + + // --- description from .arch file --- + // if (SCC) then D.i = signext(SIMM16); + // else NOP. + // Conditional move with sign extension. + void + Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (scc.rawData()) { + sdst = simm16; + sdst.write(); + } + } // execute + // --- Inst_SOPK__S_CMPK_EQ_I32 class methods --- + + Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_eq_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_EQ_I32 + + Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() + { + } // ~Inst_SOPK__S_CMPK_EQ_I32 + + // --- description from .arch file --- + // SCC = (S0.i == signext(SIMM16)). + void + Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() == simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LG_I32 class methods --- + + Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lg_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LG_I32 + + Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() + { + } // ~Inst_SOPK__S_CMPK_LG_I32 + + // --- description from .arch file --- + // SCC = (S0.i != signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() != simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GT_I32 class methods --- + + Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_gt_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GT_I32 + + Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() + { + } // ~Inst_SOPK__S_CMPK_GT_I32 + + // --- description from .arch file --- + // SCC = (S0.i > signext(SIMM16)). + void + Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() > simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GE_I32 class methods --- + + Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_ge_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GE_I32 + + Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() + { + } // ~Inst_SOPK__S_CMPK_GE_I32 + + // --- description from .arch file --- + // SCC = (S0.i >= signext(SIMM16)). + void + Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() >= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LT_I32 class methods --- + + Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lt_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LT_I32 + + Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() + { + } // ~Inst_SOPK__S_CMPK_LT_I32 + + // --- description from .arch file --- + // SCC = (S0.i < signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() < simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LE_I32 class methods --- + + Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_le_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LE_I32 + + Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() + { + } // ~Inst_SOPK__S_CMPK_LE_I32 + + // --- description from .arch file --- + // SCC = (S0.i <= signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() <= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_EQ_U32 class methods --- + + Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_eq_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_EQ_U32 + + Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() + { + } // ~Inst_SOPK__S_CMPK_EQ_U32 + + // --- description from .arch file --- + // SCC = (S0.u == SIMM16). + void + Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() == simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LG_U32 class methods --- + + Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lg_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LG_U32 + + Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() + { + } // ~Inst_SOPK__S_CMPK_LG_U32 + + // --- description from .arch file --- + // SCC = (S0.u != SIMM16). + void + Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() != simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GT_U32 class methods --- + + Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_gt_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GT_U32 + + Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() + { + } // ~Inst_SOPK__S_CMPK_GT_U32 + + // --- description from .arch file --- + // SCC = (S0.u > SIMM16). + void + Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() > simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GE_U32 class methods --- + + Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_ge_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GE_U32 + + Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() + { + } // ~Inst_SOPK__S_CMPK_GE_U32 + + // --- description from .arch file --- + // SCC = (S0.u >= SIMM16). + void + Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() >= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LT_U32 class methods --- + + Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lt_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LT_U32 + + Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() + { + } // ~Inst_SOPK__S_CMPK_LT_U32 + + // --- description from .arch file --- + // SCC = (S0.u < SIMM16). + void + Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() < simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LE_U32 class methods --- + + Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_le_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LE_U32 + + Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() + { + } // ~Inst_SOPK__S_CMPK_LE_U32 + + // --- description from .arch file --- + // SCC = (S0.u <= SIMM16). + void + Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() <= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_ADDK_I32 class methods --- + + Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_addk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_ADDK_I32 + + Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() + { + } // ~Inst_SOPK__S_ADDK_I32 + + // --- description from .arch file --- + // D.i = D.i + signext(SIMM16); + // SCC = overflow. + void + Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); + scc = (bits(src.rawData(), 31) == bits(simm16, 15) + && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOPK__S_MULK_I32 class methods --- + + Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_mulk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_MULK_I32 + + Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() + { + } // ~Inst_SOPK__S_MULK_I32 + + // --- description from .arch file --- + // D.i = D.i * signext(SIMM16). + void + Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); + + sdst.write(); + } // execute + // --- Inst_SOPK__S_CBRANCH_I_FORK class methods --- + + Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cbranch_i_fork") + { + setFlag(Branch); + } // Inst_SOPK__S_CBRANCH_I_FORK + + Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() + { + } // ~Inst_SOPK__S_CBRANCH_I_FORK + + // --- description from .arch file --- + // mask_pass = S0.u64 & EXEC; + // mask_fail = ~S0.u64 & EXEC; + // target_addr = PC + signext(SIMM16 * 4) + 4; + // if (mask_pass == EXEC) + // PC = target_addr; + // elsif (mask_fail == EXEC) + // PC += 4; + // elsif (bitcount(mask_fail) < bitcount(mask_pass)) + // EXEC = mask_fail; + // SGPR[CSP*4] = { target_addr, mask_pass }; + // CSP++; + // PC += 4; + // else + // EXEC = mask_pass; + // SGPR[CSP*4] = { PC + 4, mask_fail }; + // CSP++; + // PC = target_addr; + // end. + // Conditional branch using branch-stack. + // S0 = compare mask(vcc or any sgpr), and + // SIMM16 = signed DWORD branch offset relative to next instruction. + // See also S_CBRANCH_JOIN. + void + Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPK__S_GETREG_B32 class methods --- + + Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_getreg_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_GETREG_B32 + + Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() + { + } // ~Inst_SOPK__S_GETREG_B32 + + // --- description from .arch file --- + // D.u = hardware-reg. Read some or all of a hardware register into the + // LSBs of D. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + sdst.read(); + + // Store value from hardware to part of the SDST. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + sdst = (hwreg & mask) >> offset; + sdst.write(); + } // execute + // --- Inst_SOPK__S_SETREG_B32 class methods --- + + Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_setreg_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_SETREG_B32 + + Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() + { + } // ~Inst_SOPK__S_SETREG_B32 + + // --- description from .arch file --- + // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware + // register. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + sdst.read(); + + // Store value from SDST to part of the hardware register. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask)); + gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); + + // set MODE register to control the behavior of single precision + // floating-point numbers: denormal mode or round mode + if (hwregId==1 && size==2 + && (offset==4 || offset==0)) { + warn_once("Be cautious that s_setreg_b32 has no real effect " + "on FP modes: %s\n", gpuDynInst->disassemble()); + return; + } + + // panic if not changing MODE of floating-point numbers + panicUnimplemented(); + } // execute + // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods --- + + Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( + InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_setreg_imm32_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_SETREG_IMM32_B32 + + Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() + { + } // ~Inst_SOPK__S_SETREG_IMM32_B32 + + // --- description from .arch file --- + // Write some or all of the LSBs of IMM32 into a hardware register; this + // --- instruction requires a 32-bit literal constant. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarRegI32 simm32 = extData.imm_u32; + + // Store value from SIMM32 to part of the hardware register. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask)); + gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); + + // set MODE register to control the behavior of single precision + // floating-point numbers: denormal mode or round mode + if (hwregId==HW_REG_MODE && size==2 + && (offset==4 || offset==0)) { + warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " + "on FP modes: %s\n", gpuDynInst->disassemble()); + return; + } + + // panic if not changing modes of single-precision FPs + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopp.cc b/src/arch/amdgpu/vega/insts/sopp.cc new file mode 100644 index 0000000000..df5cdbf681 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopp.cc @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "debug/GPUSync.hh" +#include "gpu-compute/shader.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPP__S_NOP class methods --- + + Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_nop") + { + setFlag(Nop); + } // Inst_SOPP__S_NOP + + Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() + { + } // ~Inst_SOPP__S_NOP + + // --- description from .arch file --- + // Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time, + // 7 = 8 times. + // This instruction may be used to introduce wait states to resolve + // hazards; see the shader programming guide for details. Compare with + // S_SLEEP. + void + Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_SOPP__S_ENDPGM class methods --- + + Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_endpgm") + { + setFlag(EndOfKernel); + } // Inst_SOPP__S_ENDPGM + + Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() + { + } // ~Inst_SOPP__S_ENDPGM + + // --- description from .arch file --- + // End of program; terminate wavefront. + // The hardware implicitly executes S_WAITCNT 0 before executing this + // --- instruction. + // See S_ENDPGM_SAVED for the context-switch version of this instruction. + void + Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ComputeUnit *cu = gpuDynInst->computeUnit(); + + // delete extra instructions fetched for completed work-items + wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, + wf->instructionBuffer.end()); + + if (wf->pendingFetch) { + wf->dropFetch = true; + } + + wf->computeUnit->fetchStage.fetchUnit(wf->simdId) + .flushBuf(wf->wfSlotId); + wf->setStatus(Wavefront::S_STOPPED); + + int refCount = wf->computeUnit->getLds() + .decreaseRefCounter(wf->dispatchId, wf->wgId); + + /** + * The parent WF of this instruction is exiting, therefore + * it should not participate in this barrier any longer. This + * prevents possible deadlock issues if WFs exit early. + */ + int bar_id = WFBarrier::InvalidID; + if (wf->hasBarrier()) { + assert(wf->getStatus() != Wavefront::S_BARRIER); + bar_id = wf->barrierId(); + assert(bar_id != WFBarrier::InvalidID); + wf->releaseBarrier(); + cu->decMaxBarrierCnt(bar_id); + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " + "program and decrementing max barrier count for " + "barrier Id%d. New max count: %d.\n", cu->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, + cu->maxBarrierCnt(bar_id)); + } + + DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", + wf->computeUnit->cu_id, wf->wgId, refCount); + + wf->computeUnit->registerManager->freeRegisters(wf); + wf->computeUnit->stats.completedWfs++; + wf->computeUnit->activeWaves--; + + panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " + "than zero\n", wf->computeUnit->cu_id); + + DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", + wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); + + for (int i = 0; i < wf->vecReads.size(); i++) { + if (wf->rawDist.find(i) != wf->rawDist.end()) { + wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); + } + } + wf->vecReads.clear(); + wf->rawDist.clear(); + wf->lastInstExec = 0; + + if (!refCount) { + /** + * If all WFs have finished, and hence the WG has finished, + * then we can free up the barrier belonging to the parent + * WG, but only if we actually used a barrier (i.e., more + * than one WF in the WG). + */ + if (bar_id != WFBarrier::InvalidID) { + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " + "now complete. Releasing barrier Id%d.\n", cu->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId, + wf->barrierId()); + cu->releaseBarrier(bar_id); + } + + /** + * Last wavefront of the workgroup has executed return. If the + * workgroup is not the final one in the kernel, then simply + * retire it; however, if it is the final one, i.e., indicating + * the kernel end, then release operation (i.e., GL2 WB) is + * needed + */ + + //check whether the workgroup is indicating the kernel end, i.e., + //the last workgroup in the kernel + bool kernelEnd = + wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); + + bool relNeeded = + wf->computeUnit->shader->impl_kern_end_rel; + + //if it is not a kernel end, then retire the workgroup directly + if (!kernelEnd || !relNeeded) { + wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); + wf->setStatus(Wavefront::S_STOPPED); + wf->computeUnit->stats.completedWGs++; + + return; + } + + /** + * if it is a kernel end, inject a memory sync, i.e., GL2 WB, and + * retire the workgroup after receving response. + * note that GL0V and GL1 are read only, and they just forward GL2 + * WB request. When forwarding, GL1 send the request to all GL2 in + * the complex + */ + setFlag(MemSync); + setFlag(GlobalSegment); + // Notify Memory System of Kernel Completion + // Kernel End = isKernel + isMemSync + wf->setStatus(Wavefront::S_RETURNING); + gpuDynInst->simdId = wf->simdId; + gpuDynInst->wfSlotId = wf->wfSlotId; + gpuDynInst->wfDynId = wf->wfDynId; + + DPRINTF(GPUExec, "inject global memory fence for CU%d: " + "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId); + + // call shader to prepare the flush operations + wf->computeUnit->shader->prepareFlush(gpuDynInst); + + wf->computeUnit->stats.completedWGs++; + } else { + wf->computeUnit->shader->dispatcher().scheduleDispatch(); + } + } // execute + + // --- Inst_SOPP__S_BRANCH class methods --- + + Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_branch") + { + setFlag(Branch); + } // Inst_SOPP__S_BRANCH + + Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() + { + } // ~Inst_SOPP__S_BRANCH + + // --- description from .arch file --- + // PC = PC + signext(SIMM16 * 4) + 4 (short jump). + // For a long jump, use S_SETPC. + void + Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_WAKEUP class methods --- + + Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_wakeup") + { + } // Inst_SOPP__S_WAKEUP + + Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() + { + } // ~Inst_SOPP__S_WAKEUP + + // --- description from .arch file --- + // Allow a wave to 'ping' all the other waves in its threadgroup to force + // them to wake up immediately from an S_SLEEP instruction. The ping is + // ignored if the waves are not sleeping. + // This allows for more efficient polling on a memory location. The waves + // which are polling can sit in a long S_SLEEP between memory reads, but + // the wave which writes the value can tell them all to wake up early now + // that the data is available. This is useful for fBarrier implementations + // (speedup). + // This method is also safe from races because if any wave misses the ping, + // everything still works fine (whoever missed it just completes their + // normal S_SLEEP). + void + Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_SCC0 class methods --- + + Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_scc0") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_SCC0 + + Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() + { + } // ~Inst_SOPP__S_CBRANCH_SCC0 + + // --- description from .arch file --- + // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (!scc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_SCC1 class methods --- + + Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_scc1") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_SCC1 + + Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() + { + } // ~Inst_SOPP__S_CBRANCH_SCC1 + + // --- description from .arch file --- + // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (scc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_VCCZ class methods --- + + Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_vccz") + { + setFlag(Branch); + setFlag(ReadsVCC); + } // Inst_SOPP__S_CBRANCH_VCCZ + + Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() + { + } // ~Inst_SOPP__S_CBRANCH_VCCZ + + // --- description from .arch file --- + // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + + vcc.read(); + + if (!vcc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_VCCNZ class methods --- + + Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_vccnz") + { + setFlag(Branch); + setFlag(ReadsVCC); + } // Inst_SOPP__S_CBRANCH_VCCNZ + + Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() + { + } // ~Inst_SOPP__S_CBRANCH_VCCNZ + + // --- description from .arch file --- + // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + vcc.read(); + + if (vcc.rawData()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_CBRANCH_EXECZ class methods --- + + Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_execz") + { + setFlag(Branch); + setFlag(ReadsEXEC); + } // Inst_SOPP__S_CBRANCH_EXECZ + + Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() + { + } // ~Inst_SOPP__S_CBRANCH_EXECZ + + // --- description from .arch file --- + // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().none()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_CBRANCH_EXECNZ class methods --- + + Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_execnz") + { + setFlag(Branch); + setFlag(ReadsEXEC); + } // Inst_SOPP__S_CBRANCH_EXECNZ + + Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() + { + } // ~Inst_SOPP__S_CBRANCH_EXECNZ + + // --- description from .arch file --- + // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().any()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_BARRIER class methods --- + + Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_barrier") + { + setFlag(MemBarrier); + } // Inst_SOPP__S_BARRIER + + Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() + { + } // ~Inst_SOPP__S_BARRIER + + // --- description from .arch file --- + // Synchronize waves within a threadgroup. + // If not all waves of the threadgroup have been created yet, waits for + // entire group before proceeding. + // If some waves in the threadgroup have already terminated, this waits on + // only the surviving waves. + // Barriers are legal inside trap handlers. + void + Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ComputeUnit *cu = gpuDynInst->computeUnit(); + + if (wf->hasBarrier()) { + int bar_id = wf->barrierId(); + assert(wf->getStatus() == Wavefront::S_BARRIER); + cu->incNumAtBarrier(bar_id); + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " + "barrier Id%d. %d waves now at barrier, %d waves " + "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, + wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), + cu->numYetToReachBarrier(bar_id)); + } + } // execute + // --- Inst_SOPP__S_SETKILL class methods --- + + Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_setkill") + { + } // Inst_SOPP__S_SETKILL + + Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() + { + } // ~Inst_SOPP__S_SETKILL + + // --- description from .arch file --- + // set KILL bit to value of SIMM16[0]. + // Used primarily for debugging kill wave host command behavior. + void + Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_WAITCNT class methods --- + + Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_waitcnt") + { + setFlag(ALU); + setFlag(Waitcnt); + } // Inst_SOPP__S_WAITCNT + + Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() + { + } // ~Inst_SOPP__S_WAITCNT + + // --- description from .arch file --- + // Wait for the counts of outstanding lds, vector-memory and + // --- export/vmem-write-data to be at or below the specified levels. + // SIMM16[3:0] = vmcount (vector memory operations), + // SIMM16[6:4] = export/mem-write-data count, + // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). + void + Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 vm_cnt = 0; + ScalarRegI32 exp_cnt = 0; + ScalarRegI32 lgkm_cnt = 0; + vm_cnt = bits(instData.SIMM16, 3, 0); + exp_cnt = bits(instData.SIMM16, 6, 4); + lgkm_cnt = bits(instData.SIMM16, 12, 8); + gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT); + gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); + } // execute + // --- Inst_SOPP__S_SETHALT class methods --- + + Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sethalt") + { + } // Inst_SOPP__S_SETHALT + + Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() + { + } // ~Inst_SOPP__S_SETHALT + + // --- description from .arch file --- + // Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume. + // The halt flag is ignored while PRIV == 1 (inside trap handlers) but the + // shader will halt immediately after the handler returns if HALT is still + // set at that time. + void + Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SLEEP class methods --- + + Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sleep") + { + setFlag(ALU); + setFlag(Sleep); + } // Inst_SOPP__S_SLEEP + + Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() + { + } // ~Inst_SOPP__S_SLEEP + + // --- description from .arch file --- + // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. + // The exact amount of delay is approximate. Compare with S_NOP. + void + Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); + // sleep duration is specified in multiples of 64 cycles + gpuDynInst->wavefront()->setSleepTime(64 * simm16); + } // execute + // --- Inst_SOPP__S_SETPRIO class methods --- + + Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_setprio") + { + setFlag(ALU); + } // Inst_SOPP__S_SETPRIO + + Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() + { + } // ~Inst_SOPP__S_SETPRIO + + // --- description from .arch file --- + // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, + // 3 = highest. + // The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0], + // WaveAge[3:0]}. + void + Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU16 simm16 = instData.SIMM16; + ScalarRegU32 userPrio = simm16 & 0x3; + + warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio); + } // execute + // --- Inst_SOPP__S_SENDMSG class methods --- + + Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sendmsg") + { + } // Inst_SOPP__S_SENDMSG + + Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() + { + } // ~Inst_SOPP__S_SENDMSG + + // --- description from .arch file --- + // Send a message upstream to VGT or the interrupt handler. + // SIMM16[9:0] contains the message type and is documented in the shader + // --- programming guide. + void + Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SENDMSGHALT class methods --- + + Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sendmsghalt") + { + } // Inst_SOPP__S_SENDMSGHALT + + Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() + { + } // ~Inst_SOPP__S_SENDMSGHALT + + // --- description from .arch file --- + // Send a message and then HALT the wavefront; see S_SENDMSG for details. + void + Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_TRAP class methods --- + + Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_trap") + { + } // Inst_SOPP__S_TRAP + + Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() + { + } // ~Inst_SOPP__S_TRAP + + // --- description from .arch file --- + // TrapID = SIMM16[7:0]; + // Wait for all instructions to complete; + // set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0], + // PC[47:0]}; + // PC = TBA (trap base address); + // PRIV = 1. + // Enter the trap handler. This instruction may be generated internally as + // well in response to a host trap (HT = 1) or an exception. + // TrapID 0 is reserved for hardware use and should not be used in a + // shader-generated trap. + void + Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_ICACHE_INV class methods --- + + Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_icache_inv") + { + } // Inst_SOPP__S_ICACHE_INV + + Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() + { + } // ~Inst_SOPP__S_ICACHE_INV + + // --- description from .arch file --- + // Invalidate entire L1 instruction cache. + // You must have 12 separate S_NOP instructions or a jump/branch + // instruction after this instruction + // to ensure the SQ instruction buffer is purged. + void + Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_INCPERFLEVEL class methods --- + + Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_incperflevel") + { + } // Inst_SOPP__S_INCPERFLEVEL + + Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() + { + } // ~Inst_SOPP__S_INCPERFLEVEL + + // --- description from .arch file --- + // Increment performance counter specified in SIMM16[3:0] by 1. + void + Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_DECPERFLEVEL class methods --- + + Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_decperflevel") + { + } // Inst_SOPP__S_DECPERFLEVEL + + Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() + { + } // ~Inst_SOPP__S_DECPERFLEVEL + + // --- description from .arch file --- + // Decrement performance counter specified in SIMM16[3:0] by 1. + void + Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_TTRACEDATA class methods --- + + Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_ttracedata") + { + } // Inst_SOPP__S_TTRACEDATA + + Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() + { + } // ~Inst_SOPP__S_TTRACEDATA + + // --- description from .arch file --- + // Send M0 as user data to the thread trace stream. + void + Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS + + Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS + + // --- description from .arch file --- + // if (conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4) + // + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbguser") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGUSER + + Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGUSER + + // --- description from .arch file --- + // if (conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER + + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: + ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER + + // --- description from .arch file --- + // if (conditional_debug_system || conditional_debug_user) then PC = PC + + // --- signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER + + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: + ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER + + // --- description from .arch file --- + // if (conditional_debug_system && conditional_debug_user) then PC = PC + + // --- signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_ENDPGM_SAVED class methods --- + + Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_endpgm_saved") + { + } // Inst_SOPP__S_ENDPGM_SAVED + + Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() + { + } // ~Inst_SOPP__S_ENDPGM_SAVED + + // --- description from .arch file --- + // End of program; signal that a wave has been saved by the context-switch + // trap handler and terminate wavefront. + // The hardware implicitly executes S_WAITCNT 0 before executing this + // instruction. + // Use S_ENDPGM in all cases unless you are executing the context-switch + // save handler. + void + Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods --- + + Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_set_gpr_idx_off") + { + } // Inst_SOPP__S_SET_GPR_IDX_OFF + + Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() + { + } // ~Inst_SOPP__S_SET_GPR_IDX_OFF + + // --- description from .arch file --- + // MODE.gpr_idx_en = 0. + // Clear GPR indexing mode. Vector operations after this will not perform + // --- relative GPR addressing regardless of the contents of M0. This + // --- instruction does not modify M0. + void + Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods --- + + Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") + { + } // Inst_SOPP__S_SET_GPR_IDX_MODE + + Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() + { + } // ~Inst_SOPP__S_SET_GPR_IDX_MODE + + // --- description from .arch file --- + // M0[15:12] = SIMM4. + // Modify the mode used for vector GPR indexing. + // The raw contents of the source field are read and used to set the enable + // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL + // and SIMM4[3] = VDST_REL. + void + Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vinterp.cc b/src/arch/amdgpu/vega/insts/vinterp.cc new file mode 100644 index 0000000000..784f6f2eb2 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vinterp.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VINTRP__V_INTERP_P1_F32 class methods --- + + Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_p1_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_P1_F32 + + Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() + { + } // ~Inst_VINTRP__V_INTERP_P1_F32 + + // --- description from .arch file --- + // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; + // if D == S then data corruption will occur. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VINTRP__V_INTERP_P2_F32 class methods --- + + Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_p2_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_P2_F32 + + Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() + { + } // ~Inst_VINTRP__V_INTERP_P2_F32 + + // --- description from .arch file --- + // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VINTRP__V_INTERP_MOV_F32 class methods --- + + Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_mov_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_MOV_F32 + + Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() + { + } // ~Inst_VINTRP__V_INTERP_MOV_F32 + + // --- description from .arch file --- + // D.f = {P10,P20,P0}[S.u]; parameter load. + void + Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc new file mode 100644 index 0000000000..fc41c0ae78 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -0,0 +1,2340 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP1__V_NOP class methods --- + + Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_nop") + { + setFlag(Nop); + setFlag(ALU); + } // Inst_VOP1__V_NOP + + Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() + { + } // ~Inst_VOP1__V_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_VOP1__V_MOV_B32 class methods --- + + Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_B32 + + Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() + { + } // ~Inst_VOP1__V_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (isDPPInst()) { + VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src_dpp.read(); + + DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + // NOTE: For VOP1, there is no SRC1, so make sure we're not trying + // to negate it or take the absolute value of it + assert(!extData.iFmt_VOP_DPP.SRC1_ABS); + assert(!extData.iFmt_VOP_DPP.SRC1_NEG); + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src_dpp[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods --- + + Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_readfirstlane_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_READFIRSTLANE_B32 + + Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() + { + } // ~Inst_VOP1__V_READFIRSTLANE_B32 + + // --- description from .arch file --- + // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data + // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) + // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ + // translates to V_READLANE_B32. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarRegI32 src_lane(0); + ScalarRegU64 exec_mask = wf->execMask().to_ullong(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (exec_mask) { + src_lane = findLsbSet(exec_mask); + } + + sdst = src[src_lane]; + + sdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_I32_F64 class methods --- + + Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_I32_F64 + + Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() + { + } // ~Inst_VOP1__V_CVT_I32_F64 + + // --- description from .arch file --- + // D.i = (int)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_I32 class methods --- + + Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_i32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_I32 + + Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() + { + } // ~Inst_VOP1__V_CVT_F64_I32 + + // --- description from .arch file --- + // D.d = (double)S0.i. + void + Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_I32 class methods --- + + Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_i32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_I32 + + Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() + { + } // ~Inst_VOP1__V_CVT_F32_I32 + + // --- description from .arch file --- + // D.f = (float)S0.i. + void + Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_U32 class methods --- + + Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_u32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_U32 + + Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() + { + } // ~Inst_VOP1__V_CVT_F32_U32 + + // --- description from .arch file --- + // D.f = (float)S0.u. + void + Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_U32_F32 class methods --- + + Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_U32_F32 + + Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() + { + } // ~Inst_VOP1__V_CVT_U32_F32 + + // --- description from .arch file --- + // D.u = (unsigned)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_I32_F32 class methods --- + + Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_I32_F32 + + Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() + { + } // ~Inst_VOP1__V_CVT_I32_F32 + + // --- description from .arch file --- + // D.i = (int)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_MOV_FED_B32 class methods --- + + Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_fed_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_FED_B32 + + Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() + { + } // ~Inst_VOP1__V_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u; + // Introduce EDC double error upon write to dest vgpr without causing an + // --- exception. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F16_F32 class methods --- + + Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F16_F32 + + Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() + { + } // ~Inst_VOP1__V_CVT_F16_F32 + + // --- description from .arch file --- + // D.f16 = flt32_to_flt16(S0.f). + // Supports input modifiers and creates FP16 denormals when appropriate. + void + Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F32_F16 class methods --- + + Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_f16") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_F16 + + Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() + { + } // ~Inst_VOP1__V_CVT_F32_F16 + + // --- description from .arch file --- + // D.f = flt16_to_flt32(S0.f16). + // FP16 denormal inputs are always accepted. + void + Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods --- + + Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_RPI_I32_F32 + + Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() + { + } // ~Inst_VOP1__V_CVT_RPI_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f + 0.5). + void + Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods --- + + Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_FLR_I32_F32 + + Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() + { + } // ~Inst_VOP1__V_CVT_FLR_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f). + void + Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods --- + + Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_OFF_F32_I4 + + Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() + { + } // ~Inst_VOP1__V_CVT_OFF_F32_I4 + + // --- description from .arch file --- + // 4-bit signed int to 32-bit float. Used for interpolation in shader. + void + Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) + { + // Could not parse sq_uc.arch desc field + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F32_F64 class methods --- + + Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F32_F64 + + Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() + { + } // ~Inst_VOP1__V_CVT_F32_F64 + + // --- description from .arch file --- + // D.f = (float)S0.d. + void + Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_F32 class methods --- + + Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_f32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_F32 + + Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() + { + } // ~Inst_VOP1__V_CVT_F64_F32 + + // --- description from .arch file --- + // D.d = (double)S0.f. + void + Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE0 + + Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE0 + + // --- description from .arch file --- + // D.f = (float)(S0.u[7:0]). + void + Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE1 + + Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE1 + + // --- description from .arch file --- + // D.f = (float)(S0.u[15:8]). + void + Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE2 + + Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE2 + + // --- description from .arch file --- + // D.f = (float)(S0.u[23:16]). + void + Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE3 + + Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE3 + + // --- description from .arch file --- + // D.f = (float)(S0.u[31:24]). + void + Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_U32_F64 class methods --- + + Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_U32_F64 + + Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() + { + } // ~Inst_VOP1__V_CVT_U32_F64 + + // --- description from .arch file --- + // D.u = (unsigned)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_U32 class methods --- + + Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_u32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_U32 + + Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() + { + } // ~Inst_VOP1__V_CVT_F64_U32 + + // --- description from .arch file --- + // D.d = (double)S0.u. + void + Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_TRUNC_F64 class methods --- + + Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_TRUNC_F64 + + Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() + { + } // ~Inst_VOP1__V_TRUNC_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d), return integer part of S0.d. + void + Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CEIL_F64 class methods --- + + Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CEIL_F64 + + Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() + { + } // ~Inst_VOP1__V_CEIL_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. + void + Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RNDNE_F64 class methods --- + + Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RNDNE_F64 + + Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() + { + } // ~Inst_VOP1__V_RNDNE_F64 + + // --- description from .arch file --- + // D.d = round_nearest_even(S0.d). + void + Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FLOOR_F64 class methods --- + + Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FLOOR_F64 + + Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() + { + } // ~Inst_VOP1__V_FLOOR_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. + void + Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FRACT_F32 class methods --- + + Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FRACT_F32 + + Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() + { + } // ~Inst_VOP1__V_FRACT_F32 + + // --- description from .arch file --- + // D.f = S0.f - floor(S0.f). + void + Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_TRUNC_F32 class methods --- + + Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_TRUNC_F32 + + Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() + { + } // ~Inst_VOP1__V_TRUNC_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f), return integer part of S0.f. + void + Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst (gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CEIL_F32 class methods --- + + Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CEIL_F32 + + Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() + { + } // ~Inst_VOP1__V_CEIL_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. + void + Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RNDNE_F32 class methods --- + + Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RNDNE_F32 + + Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() + { + } // ~Inst_VOP1__V_RNDNE_F32 + + // --- description from .arch file --- + // D.f = round_nearest_even(S0.f). + void + Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FLOOR_F32 class methods --- + + Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FLOOR_F32 + + Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() + { + } // ~Inst_VOP1__V_FLOOR_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. + void + Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_EXP_F32 class methods --- + + Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_EXP_F32 + + Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() + { + } // ~Inst_VOP1__V_EXP_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f). + void + Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_LOG_F32 class methods --- + + Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_LOG_F32 + + Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() + { + } // ~Inst_VOP1__V_LOG_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm. + void + Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_F32 class methods --- + + Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RCP_F32 + + Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() + { + } // ~Inst_VOP1__V_RCP_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. + void + Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods --- + + Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_iflag_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RCP_IFLAG_F32 + + Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() + { + } // ~Inst_VOP1__V_RCP_IFLAG_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise + // --- integer DIV_BY_ZERO exception but cannot raise floating-point + // --- exceptions. + void + Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RSQ_F32 class methods --- + + Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RSQ_F32 + + Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() + { + } // ~Inst_VOP1__V_RSQ_F32 + + // --- description from .arch file --- + // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. + void + Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_F64 class methods --- + + Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RCP_F64 + + Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() + { + } // ~Inst_VOP1__V_RCP_F64 + + // --- description from .arch file --- + // D.d = 1.0 / S0.d. + void + Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = 1.0 / src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RSQ_F64 class methods --- + + Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RSQ_F64 + + Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() + { + } // ~Inst_VOP1__V_RSQ_F64 + + // --- description from .arch file --- + // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. + void + Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane]) + && !std::signbit(src[lane])) { + vdst[lane] = 0.0; + } else if (std::signbit(src[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SQRT_F32 class methods --- + + Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_SQRT_F32 + + Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() + { + } // ~Inst_VOP1__V_SQRT_F32 + + // --- description from .arch file --- + // D.f = sqrt(S0.f). + void + Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SQRT_F64 class methods --- + + Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_SQRT_F64 + + Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() + { + } // ~Inst_VOP1__V_SQRT_F64 + + // --- description from .arch file --- + // D.d = sqrt(S0.d). + void + Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SIN_F32 class methods --- + + Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sin_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_SIN_F32 + + Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() + { + } // ~Inst_VOP1__V_SIN_F32 + + // --- description from .arch file --- + // D.f = sin(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 0.0. + void + Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (src[lane] < -256.0 || src[lane] > 256.0) { + vdst[lane] = 0.0; + } else { + vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_COS_F32 class methods --- + + Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cos_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_COS_F32 + + Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() + { + } // ~Inst_VOP1__V_COS_F32 + + // --- description from .arch file --- + // D.f = cos(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 1.0. + void + Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (src[lane] < -256.0 || src[lane] > 256.0) { + vdst[lane] = 0.0; + } else { + vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_NOT_B32 class methods --- + + Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_not_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_NOT_B32 + + Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() + { + } // ~Inst_VOP1__V_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u. + // Input and output modifiers not supported. + void + Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ~src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_BFREV_B32 class methods --- + + Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_bfrev_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_BFREV_B32 + + Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() + { + } // ~Inst_VOP1__V_BFREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31], bitfield reverse. + // Input and output modifiers not supported. + void + Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = reverseBits(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBH_U32 class methods --- + + Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbh_u32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBH_U32 + + Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() + { + } // ~Inst_VOP1__V_FFBH_U32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from MSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOneMsb(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBL_B32 class methods --- + + Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbl_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBL_B32 + + Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() + { + } // ~Inst_VOP1__V_FFBL_B32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from LSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOne(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBH_I32 class methods --- + + Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbh_i32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBH_I32 + + Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() + { + } // ~Inst_VOP1__V_FFBH_I32 + + // --- description from .arch file --- + // D.u = position of first bit different from sign bit in S0.i from MSB; + // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. + void + Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = firstOppositeSignBit(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods --- + + Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FREXP_EXP_I32_F64 + + Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() + { + } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 + + // --- description from .arch file --- + // See V_FREXP_EXP_I32_F32. + void + Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp = 0; + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F64 class methods --- + + Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FREXP_MANT_F64 + + Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() + { + } // ~Inst_VOP1__V_FREXP_MANT_F64 + + // --- description from .arch file --- + // See V_FREXP_MANT_F32. + void + Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FRACT_F64 class methods --- + + Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FRACT_F64 + + Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() + { + } // ~Inst_VOP1__V_FRACT_F64 + + // --- description from .arch file --- + // See V_FRACT_F32. + void + Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF64 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods --- + + Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FREXP_EXP_I32_F32 + + Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() + { + } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.i = 0; + // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). + // Returns exponent of single precision float input, such that S0.f = + // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns + // the significand. + void + Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F32 class methods --- + + Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FREXP_MANT_F32 + + Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() + { + } // ~Inst_VOP1__V_FREXP_MANT_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; + // else D.f = Mantissa(S0.f). + // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary + // --- significand of single precision float input, such that S0.f = + // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which + // --- returns integer exponent. + void + Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CLREXCP class methods --- + + Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_clrexcp") + { + setFlag(ALU); + } // Inst_VOP1__V_CLREXCP + + Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() + { + } // ~Inst_VOP1__V_CLREXCP + + // --- description from .arch file --- + // Clear wave's exception state in SIMD (SP). + void + Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F16_U16 class methods --- + + Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_u16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_F16_U16 + + Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() + { + } // ~Inst_VOP1__V_CVT_F16_U16 + + // --- description from .arch file --- + // D.f16 = uint16_to_flt16(S.u16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F16_I16 class methods --- + + Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_i16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_F16_I16 + + Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() + { + } // ~Inst_VOP1__V_CVT_F16_I16 + + // --- description from .arch file --- + // D.f16 = int16_to_flt16(S.i16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_U16_F16 class methods --- + + Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_U16_F16 + + Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() + { + } // ~Inst_VOP1__V_CVT_U16_F16 + + // --- description from .arch file --- + // D.u16 = flt16_to_uint16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_I16_F16 class methods --- + + Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_I16_F16 + + Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() + { + } // ~Inst_VOP1__V_CVT_I16_F16 + + // --- description from .arch file --- + // D.i16 = flt16_to_int16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RCP_F16 class methods --- + + Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RCP_F16 + + Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() + { + } // ~Inst_VOP1__V_RCP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecip(S0.f16). + void + Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_SQRT_F16 class methods --- + + Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_SQRT_F16 + + Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() + { + } // ~Inst_VOP1__V_SQRT_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateSqrt(S0.f16). + void + Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RSQ_F16 class methods --- + + Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RSQ_F16 + + Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() + { + } // ~Inst_VOP1__V_RSQ_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecipSqrt(S0.f16). + void + Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_LOG_F16 class methods --- + + Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_LOG_F16 + + Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() + { + } // ~Inst_VOP1__V_LOG_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 0.0f; + // else + // D.f16 = ApproximateLog2(S0.f16). + void + Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_EXP_F16 class methods --- + + Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_EXP_F16 + + Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() + { + } // ~Inst_VOP1__V_EXP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 0.0f) + // D.f16 = 1.0f; + // else + // D.f16 = Approximate2ToX(S0.f16). + void + Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F16 class methods --- + + Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FREXP_MANT_F16 + + Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() + { + } // ~Inst_VOP1__V_FREXP_MANT_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.f16 = S0.f16; + // else + // D.f16 = mantissa(S0.f16). + // Result range is (-1.0,-0.5][0.5,1.0). + // C math library frexp function. + // Returns binary significand of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods --- + + Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FREXP_EXP_I16_F16 + + Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() + { + } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.i16 = 0; + // else + // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). + // C math library frexp function. + // Returns exponent of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FLOOR_F16 class methods --- + + Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FLOOR_F16 + + Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() + { + } // ~Inst_VOP1__V_FLOOR_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. + void + Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CEIL_F16 class methods --- + + Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CEIL_F16 + + Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() + { + } // ~Inst_VOP1__V_CEIL_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. + void + Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_TRUNC_F16 class methods --- + + Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_TRUNC_F16 + + Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() + { + } // ~Inst_VOP1__V_TRUNC_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16). + // Round-to-zero semantics. + void + Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RNDNE_F16 class methods --- + + Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RNDNE_F16 + + Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() + { + } // ~Inst_VOP1__V_RNDNE_F16 + + // --- description from .arch file --- + // D.f16 = FLOOR(S0.f16 + 0.5f); + // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. + // Round-to-nearest-even semantics. + void + Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FRACT_F16 class methods --- + + Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FRACT_F16 + + Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() + { + } // ~Inst_VOP1__V_FRACT_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + -floor(S0.f16). + void + Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_SIN_F16 class methods --- + + Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sin_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_SIN_F16 + + Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() + { + } // ~Inst_VOP1__V_SIN_F16 + + // --- description from .arch file --- + // D.f16 = sin(S0.f16 * 2 * PI). + void + Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_COS_F16 class methods --- + + Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cos_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_COS_F16 + + Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() + { + } // ~Inst_VOP1__V_COS_F16 + + // --- description from .arch file --- + // D.f16 = cos(S0.f16 * 2 * PI). + void + Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods --- + + Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_EXP_LEGACY_F32 + + Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() + { + } // ~Inst_VOP1__V_EXP_LEGACY_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f) with legacy semantics. + void + Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods --- + + Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_LOG_LEGACY_F32 + + Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() + { + } // ~Inst_VOP1__V_LOG_LEGACY_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. + void + Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc new file mode 100644 index 0000000000..ddd77e27da --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -0,0 +1,2187 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "debug/VEGA.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP2__V_CNDMASK_B32 class methods --- + + Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_cndmask_b32") + { + setFlag(ALU); + setFlag(ReadsVCC); + } // Inst_VOP2__V_CNDMASK_B32 + + Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() + { + } // ~Inst_VOP2__V_CNDMASK_B32 + + // --- description from .arch file --- + // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC + // as a scalar GPR in S2. + void + Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ADD_F32 class methods --- + + Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_ADD_F32 + + Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() + { + } // ~Inst_VOP2__V_ADD_F32 + + // --- description from .arch file --- + // D.f = S0.f + S1.f. + void + Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + VecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isDPPInst()) { + VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] + src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_F32 class methods --- + + Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_SUB_F32 + + Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() + { + } // ~Inst_VOP2__V_SUB_F32 + + // --- description from .arch file --- + // D.f = S0.f - S1.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_F32 class methods --- + + Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_SUBREV_F32 + + Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() + { + } // ~Inst_VOP2__V_SUBREV_F32 + + // --- description from .arch file --- + // D.f = S1.f - S0.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods --- + + Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MUL_LEGACY_F32 + + Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() + { + } // ~Inst_VOP2__V_MUL_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). + void + Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_F32 class methods --- + + Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MUL_F32 + + Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() + { + } // ~Inst_VOP2__V_MUL_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f. + void + Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_I32_I24 class methods --- + + Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_i32_i24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_I32_I24 + + Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() + { + } // ~Inst_VOP2__V_MUL_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0]. + void + Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods --- + + Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_HI_I32_I24 + + Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() + { + } // ~Inst_VOP2__V_MUL_HI_I32_I24 + + // --- description from .arch file --- + // D.i = (S0.i[23:0] * S1.i[23:0])>>32. + void + Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 tmp_src0 + = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); + VecElemI64 tmp_src1 + = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); + + vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_U32_U24 class methods --- + + Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_u32_u24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_U32_U24 + + Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() + { + } // ~Inst_VOP2__V_MUL_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0]. + void + Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1, + VecOperandU32& vdst, Wavefront* wf) { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * + bits(src1[lane], 23, 0); + } + } + }; + + vop2Helper(gpuDynInst, opImpl); + } // execute + // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods --- + + Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_HI_U32_U24 + + Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() + { + } // ~Inst_VOP2__V_MUL_HI_U32_U24 + + // --- description from .arch file --- + // D.i = (S0.u[23:0] * S1.u[23:0])>>32. + void + Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); + VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); + vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_F32 class methods --- + + Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MIN_F32 + + Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() + { + } // ~Inst_VOP2__V_MIN_F32 + + // --- description from .arch file --- + // D.f = (S0.f < S1.f ? S0.f : S1.f). + void + Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_F32 class methods --- + + Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MAX_F32 + + Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() + { + } // ~Inst_VOP2__V_MAX_F32 + + // --- description from .arch file --- + // D.f = (S0.f >= S1.f ? S0.f : S1.f). + void + Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_I32 class methods --- + + Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_I32 + + Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() + { + } // ~Inst_VOP2__V_MIN_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i). + void + Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_I32 class methods --- + + Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_I32 + + Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() + { + } // ~Inst_VOP2__V_MAX_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i). + void + Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_U32 class methods --- + + Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_U32 + + Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() + { + } // ~Inst_VOP2__V_MIN_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u). + void + Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_U32 class methods --- + + Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_U32 + + Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() + { + } // ~Inst_VOP2__V_MAX_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u). + void + Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHRREV_B32 class methods --- + + Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshrrev_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHRREV_B32 + + Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() + { + } // ~Inst_VOP2__V_LSHRREV_B32 + + // --- description from .arch file --- + // D.u = S1.u >> S0.u[4:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ASHRREV_I32 class methods --- + + Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ashrrev_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_ASHRREV_I32 + + Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() + { + } // ~Inst_VOP2__V_ASHRREV_I32 + + // --- description from .arch file --- + // D.i = signext(S1.i) >> S0.i[4:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHLREV_B32 class methods --- + + Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshlrev_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHLREV_B32 + + Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() + { + } // ~Inst_VOP2__V_LSHLREV_B32 + + // --- description from .arch file --- + // D.u = S1.u << S0.u[4:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and vdst during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " + "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: " + "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " + "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_AND_B32 class methods --- + + Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_and_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_AND_B32 + + Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() + { + } // ~Inst_VOP2__V_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isDPPInst()) { + VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] & src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_OR_B32 class methods --- + + Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_or_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_OR_B32 + + Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() + { + } // ~Inst_VOP2__V_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " + "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] | src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_XOR_B32 class methods --- + + Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_xor_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_XOR_B32 + + Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() + { + } // ~Inst_VOP2__V_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] ^ src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAC_F32 class methods --- + + Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mac_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAC); + } // Inst_VOP2__V_MAC_F32 + + Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() + { + } // ~Inst_VOP2__V_MAC_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + D.f. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + VecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + vdst.read(); + + if (isDPPInst()) { + VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0_dpp[lane], src1[lane], + vdst[lane]); + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MADMK_F32 class methods --- + + Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madmk_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP2__V_MADMK_F32 + + Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() + { + } // ~Inst_VOP2__V_MADMK_F32 + + // --- description from .arch file --- + // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // --- modifiers. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + VecElemF32 k = extData.imm_f32; + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], k, src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MADAK_F32 class methods --- + + Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madak_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP2__V_MADAK_F32 + + Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() + { + } // ~Inst_VOP2__V_MADAK_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // --- modifiers. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + VecElemF32 k = extData.imm_f32; + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], k); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ADD_CO_U32 class methods --- + + Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_ADD_CO_U32 + + Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() + { + } // ~Inst_VOP2__V_ADD_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED + // --- overflow or carry-out for V_ADDC_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register " + "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] + src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] + + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); + } + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_CO_U32 class methods --- + + Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_SUB_CO_U32 + + Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() + { + } // ~Inst_VOP2__V_SUB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_CO_U32 class methods --- + + Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_SUBREV_CO_U32 + + Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() + { + } // ~Inst_VOP2__V_SUBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_ADDC_CO_U32 class methods --- + + Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_addc_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_ADDC_CO_U32 + + Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() + { + } // ~Inst_VOP2__V_ADDC_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + VCC[threadId]; + // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) + // is an UNSIGNED overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. + void + Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + + bits(vcc.rawData(), lane); + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] + + (VecElemU64)bits(vcc.rawData(), lane, lane)) + >= 0x100000000 ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBB_CO_U32 class methods --- + + Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subb_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_SUBB_CO_U32 + + Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() + { + } // ~Inst_VOP2__V_SUBB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // --- overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // --- source comes from the SGPR-pair at S2.u. + void + Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); + vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods --- + + Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subbrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_SUBBREV_CO_U32 + + Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() + { + } // ~Inst_VOP2__V_SUBBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. + // SQ translates this to V_SUBREV_U32 with reversed operands. + void + Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); + vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) + > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_ADD_F16 class methods --- + + Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_ADD_F16 + + Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() + { + } // ~Inst_VOP2__V_ADD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_SUB_F16 class methods --- + + Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_SUB_F16 + + Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() + { + } // ~Inst_VOP2__V_SUB_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 - S1.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_SUBREV_F16 class methods --- + + Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_SUBREV_F16 + + Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() + { + } // ~Inst_VOP2__V_SUBREV_F16 + + // --- description from .arch file --- + // D.f16 = S1.f16 - S0.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MUL_F16 class methods --- + + Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MUL_F16 + + Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() + { + } // ~Inst_VOP2__V_MUL_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MAC_F16 class methods --- + + Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mac_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAC); + } // Inst_VOP2__V_MAC_F16 + + Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() + { + } // ~Inst_VOP2__V_MAC_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + D.f16. + // Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MADMK_F16 class methods --- + + Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madmk_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP2__V_MADMK_F16 + + Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() + { + } // ~Inst_VOP2__V_MADMK_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored + // in the following literal DWORD. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // modifiers. Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MADAK_F16 class methods --- + + Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madak_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP2__V_MADAK_F16 + + Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() + { + } // ~Inst_VOP2__V_MADAK_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored + // in the following literal DWORD. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // modifiers. Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_ADD_U16 class methods --- + + Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_ADD_U16 + + Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() + { + } // ~Inst_VOP2__V_ADD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 + S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_U16 class methods --- + + Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_SUB_U16 + + Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() + { + } // ~Inst_VOP2__V_SUB_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 - S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_U16 class methods --- + + Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_SUBREV_U16 + + Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() + { + } // ~Inst_VOP2__V_SUBREV_U16 + + // --- description from .arch file --- + // D.u16 = S1.u16 - S0.u16. + // Supports saturation (unsigned 16-bit integer domain). + // SQ translates this to V_SUB_U16 with reversed operands. + void + Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_LO_U16 class methods --- + + Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_lo_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_LO_U16 + + Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() + { + } // ~Inst_VOP2__V_MUL_LO_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHLREV_B16 class methods --- + + Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshlrev_b16") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHLREV_B16 + + Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() + { + } // ~Inst_VOP2__V_LSHLREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHRREV_B16 class methods --- + + Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshrrev_b16") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHRREV_B16 + + Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() + { + } // ~Inst_VOP2__V_LSHRREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ASHRREV_I16 class methods --- + + Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ashrrev_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_ASHRREV_I16 + + Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() + { + } // ~Inst_VOP2__V_ASHRREV_I16 + + // --- description from .arch file --- + // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_F16 class methods --- + + Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MAX_F16 + + Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() + { + } // ~Inst_VOP2__V_MAX_F16 + + // --- description from .arch file --- + // D.f16 = max(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MIN_F16 class methods --- + + Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MIN_F16 + + Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() + { + } // ~Inst_VOP2__V_MIN_F16 + + // --- description from .arch file --- + // D.f16 = min(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MAX_U16 class methods --- + + Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_U16 + + Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() + { + } // ~Inst_VOP2__V_MAX_U16 + + // --- description from .arch file --- + // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_I16 class methods --- + + Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_I16 + + Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() + { + } // ~Inst_VOP2__V_MAX_I16 + + // --- description from .arch file --- + // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_U16 class methods --- + + Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_U16 + + Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() + { + } // ~Inst_VOP2__V_MIN_U16 + + // --- description from .arch file --- + // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_I16 class methods --- + + Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_I16 + + Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() + { + } // ~Inst_VOP2__V_MIN_I16 + + // --- description from .arch file --- + // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LDEXP_F16 class methods --- + + Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ldexp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_LDEXP_F16 + + Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() + { + } // ~Inst_VOP2__V_LDEXP_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * (2 ** S1.i16). + void + Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_ADD_U32 class methods --- + + Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_ADD_U32 + + Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() + { + } // ~Inst_VOP2__V_ADD_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + void + Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " + "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] + src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_U32 class methods --- + + Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_SUB_U32 + + Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() + { + } // ~Inst_VOP2__V_SUB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + void + Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_U32 class methods --- + + Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_SUBREV_U32 + + Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() + { + } // ~Inst_VOP2__V_SUBREV_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_FMAC_F32 class methods --- + + Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_fmac_f32") + { + setFlag(ALU); + } // Inst_VOP2__V_FMAC_F32 + + Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32() + { + } // ~Inst_VOP2__V_FMAC_F32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + vdst.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + + vdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3.cc b/src/arch/amdgpu/vega/insts/vop3.cc new file mode 100644 index 0000000000..8f6794c9c2 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3.cc @@ -0,0 +1,8906 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP3__V_CNDMASK_B32 class methods --- + + Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cndmask_b32", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + } // Inst_VOP3__V_CNDMASK_B32 + + Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() + { + } // ~Inst_VOP3__V_CNDMASK_B32 + + // --- description from .arch file --- + // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC + // as a scalar GPR in S2. + void + Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(vcc.rawData(), lane) + ? src1[lane] : src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_F32 class methods --- + + Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_ADD_F32 + + Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() + { + } // ~Inst_VOP3__V_ADD_F32 + + // --- description from .arch file --- + // D.f = S0.f + S1.f. + void + Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_F32 class methods --- + + Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SUB_F32 + + Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() + { + } // ~Inst_VOP3__V_SUB_F32 + + // --- description from .arch file --- + // D.f = S0.f - S1.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_F32 class methods --- + + Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SUBREV_F32 + + Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() + { + } // ~Inst_VOP3__V_SUBREV_F32 + + // --- description from .arch file --- + // D.f = S1.f - S0.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods --- + + Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MUL_LEGACY_F32 + + Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() + { + } // ~Inst_VOP3__V_MUL_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). + void + Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_F32 class methods --- + + Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MUL_F32 + + Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() + { + } // ~Inst_VOP3__V_MUL_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f. + void + Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_I32_I24 class methods --- + + Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_i32_i24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_I32_I24 + + Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() + { + } // ~Inst_VOP3__V_MUL_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0]. + void + Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods --- + + Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_I32_I24 + + Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() + { + } // ~Inst_VOP3__V_MUL_HI_I32_I24 + + // --- description from .arch file --- + // D.i = (S0.i[23:0] * S1.i[23:0])>>32. + void + Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 tmp_src0 + = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); + VecElemI64 tmp_src1 + = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); + + vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_U32_U24 class methods --- + + Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_u32_u24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_U32_U24 + + Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() + { + } // ~Inst_VOP3__V_MUL_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0]. + void + Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods --- + + Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_U32_U24 + + Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() + { + } // ~Inst_VOP3__V_MUL_HI_U32_U24 + + // --- description from .arch file --- + // D.i = (S0.u[23:0] * S1.u[23:0])>>32. + void + Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); + VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); + vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_F32 class methods --- + + Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MIN_F32 + + Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() + { + } // ~Inst_VOP3__V_MIN_F32 + + // --- description from .arch file --- + // D.f = (S0.f < S1.f ? S0.f : S1.f). + void + Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F32 class methods --- + + Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MAX_F32 + + Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() + { + } // ~Inst_VOP3__V_MAX_F32 + + // --- description from .arch file --- + // D.f = (S0.f >= S1.f ? S0.f : S1.f). + void + Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_I32 class methods --- + + Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_I32 + + Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() + { + } // ~Inst_VOP3__V_MIN_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i). + void + Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_I32 class methods --- + + Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_I32 + + Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() + { + } // ~Inst_VOP3__V_MAX_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i). + void + Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_U32 class methods --- + + Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_U32 + + Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() + { + } // ~Inst_VOP3__V_MIN_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u). + void + Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_U32 class methods --- + + Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_U32 + + Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() + { + } // ~Inst_VOP3__V_MAX_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u). + void + Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B32 class methods --- + + Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B32 + + Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() + { + } // ~Inst_VOP3__V_LSHRREV_B32 + + // --- description from .arch file --- + // D.u = S1.u >> S0.u[4:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I32 class methods --- + + Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I32 + + Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() + { + } // ~Inst_VOP3__V_ASHRREV_I32 + + // --- description from .arch file --- + // D.i = signext(S1.i) >> S0.i[4:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B32 class methods --- + + Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B32 + + Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() + { + } // ~Inst_VOP3__V_LSHLREV_B32 + + // --- description from .arch file --- + // D.u = S1.u << S0.u[4:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_AND_B32 class methods --- + + Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_and_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_AND_B32 + + Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() + { + } // ~Inst_VOP3__V_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_OR_B32 class methods --- + + Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_OR_B32 + + Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() + { + } // ~Inst_VOP3__V_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_OR3_B32 class methods --- + + Inst_VOP3__V_OR3_B32::Inst_VOP3__V_OR3_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_or3_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_OR3_B32 + + Inst_VOP3__V_OR3_B32::~Inst_VOP3__V_OR3_B32() + { + } // ~Inst_VOP3__V_OR3_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u | S2.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_OR3_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane] | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_XOR_B32 class methods --- + + Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_xor_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_XOR_B32 + + Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() + { + } // ~Inst_VOP3__V_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] ^ src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAC_F32 class methods --- + + Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mac_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAC); + } // Inst_VOP3__V_MAC_F32 + + Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() + { + } // ~Inst_VOP3__V_MAC_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + D.f. + // SQ translates to V_MAD_F32. + void + Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vdst.read(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_CO_U32 class methods --- + + Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_add_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_ADD_CO_U32 + + Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32() + { + } // ~Inst_VOP3__V_ADD_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED + // --- overflow or carry-out for V_ADDC_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_SUB_CO_U32 class methods --- + + Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_sub_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_SUB_CO_U32 + + Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32() + { + } // ~Inst_VOP3__V_SUB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_CO_U32 class methods --- + + Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_SUBREV_CO_U32 + + Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32() + { + } // ~Inst_VOP3__V_SUBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + // SQ translates this to V_SUB_U32 with reversed operands. + void + Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_ADDC_CO_U32 class methods --- + + Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_addc_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_ADDC_CO_U32 + + Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32() + { + } // ~Inst_VOP3__V_ADDC_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + VCC[threadId]; + // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) + // is an UNSIGNED overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. + void + Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + + bits(vcc.rawData(), lane); + sdst.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] + + (VecElemU64)bits(vcc.rawData(), lane)) + >= 0x100000000 ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_SUBB_CO_U32 class methods --- + + Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subb_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_SUBB_CO_U32 + + Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32() + { + } // ~Inst_VOP3__V_SUBB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // --- overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // --- source comes from the SGPR-pair at S2.u. + void + Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane] + - bits(vcc.rawData(), lane); + sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods --- + + Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subbrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_SUBBREV_CO_U32 + + Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32() + { + } // ~Inst_VOP3__V_SUBBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. + void + Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane] + - bits(vcc.rawData(), lane); + sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_F16 class methods --- + + Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_ADD_F16 + + Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() + { + } // ~Inst_VOP3__V_ADD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SUB_F16 class methods --- + + Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SUB_F16 + + Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() + { + } // ~Inst_VOP3__V_SUB_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 - S1.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SUBREV_F16 class methods --- + + Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SUBREV_F16 + + Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() + { + } // ~Inst_VOP3__V_SUBREV_F16 + + // --- description from .arch file --- + // D.f16 = S1.f16 - S0.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MUL_F16 class methods --- + + Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MUL_F16 + + Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() + { + } // ~Inst_VOP3__V_MUL_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAC_F16 class methods --- + + Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mac_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(MAC); + } // Inst_VOP3__V_MAC_F16 + + Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() + { + } // ~Inst_VOP3__V_MAC_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + D.f16. + // Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_U16 class methods --- + + Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_U16 + + Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() + { + } // ~Inst_VOP3__V_ADD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 + S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_U16 class methods --- + + Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUB_U16 + + Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() + { + } // ~Inst_VOP3__V_SUB_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 - S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_U16 class methods --- + + Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUBREV_U16 + + Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() + { + } // ~Inst_VOP3__V_SUBREV_U16 + + // --- description from .arch file --- + // D.u16 = S1.u16 - S0.u16. + // Supports saturation (unsigned 16-bit integer domain). + // SQ translates this to V_SUB_U16 with reversed operands. + void + Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LO_U16 class methods --- + + Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_lo_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_LO_U16 + + Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() + { + } // ~Inst_VOP3__V_MUL_LO_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B16 class methods --- + + Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B16 + + Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() + { + } // ~Inst_VOP3__V_LSHLREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B16 class methods --- + + Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B16 + + Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() + { + } // ~Inst_VOP3__V_LSHRREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I16 class methods --- + + Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I16 + + Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() + { + } // ~Inst_VOP3__V_ASHRREV_I16 + + // --- description from .arch file --- + // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F16 class methods --- + + Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MAX_F16 + + Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() + { + } // ~Inst_VOP3__V_MAX_F16 + + // --- description from .arch file --- + // D.f16 = max(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MIN_F16 class methods --- + + Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MIN_F16 + + Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() + { + } // ~Inst_VOP3__V_MIN_F16 + + // --- description from .arch file --- + // D.f16 = min(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAX_U16 class methods --- + + Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_U16 + + Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() + { + } // ~Inst_VOP3__V_MAX_U16 + + // --- description from .arch file --- + // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_I16 class methods --- + + Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_I16 + + Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() + { + } // ~Inst_VOP3__V_MAX_I16 + + // --- description from .arch file --- + // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_U16 class methods --- + + Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_U16 + + Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() + { + } // ~Inst_VOP3__V_MIN_U16 + + // --- description from .arch file --- + // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_I16 class methods --- + + Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_I16 + + Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() + { + } // ~Inst_VOP3__V_MIN_I16 + + // --- description from .arch file --- + // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F16 class methods --- + + Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_LDEXP_F16 + + Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() + { + } // ~Inst_VOP3__V_LDEXP_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * (2 ** S1.i16). + void + Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_U32 class methods --- + + Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_U32 + + Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() + { + } // ~Inst_VOP3__V_ADD_U32 + + // --- description from .arch file --- + // D.u32 = S0.u32 + S1.u32. + void + Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_U32 class methods --- + + Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUB_U32 + + Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() + { + } // ~Inst_VOP3__V_SUB_U32 + + // --- description from .arch file --- + // D.u32 = S0.u32 - S1.u32. + void + Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_U32 class methods --- + + Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUBREV_U32 + + Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() + { + } // ~Inst_VOP3__V_SUBREV_U32 + + // --- description from .arch file --- + // D.u32 = S1.u32 - S0.u32. + void + Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_NOP class methods --- + + Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_nop", false) + { + setFlag(Nop); + setFlag(ALU); + } // Inst_VOP3__V_NOP + + Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() + { + } // ~Inst_VOP3__V_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_VOP3__V_MOV_B32 class methods --- + + Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mov_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MOV_B32 + + Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() + { + } // ~Inst_VOP3__V_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_I32_F64 class methods --- + + Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_I32_F64 + + Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() + { + } // ~Inst_VOP3__V_CVT_I32_F64 + + // --- description from .arch file --- + // D.i = (int)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_I32 class methods --- + + Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_I32 + + Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() + { + } // ~Inst_VOP3__V_CVT_F64_I32 + + // --- description from .arch file --- + // D.d = (double)S0.i. + void + Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_I32 class methods --- + + Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_I32 + + Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() + { + } // ~Inst_VOP3__V_CVT_F32_I32 + + // --- description from .arch file --- + // D.f = (float)S0.i. + void + Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + VecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_U32 class methods --- + + Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_U32 + + Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() + { + } // ~Inst_VOP3__V_CVT_F32_U32 + + // --- description from .arch file --- + // D.f = (float)S0.u. + void + Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_U32_F32 class methods --- + + Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_U32_F32 + + Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() + { + } // ~Inst_VOP3__V_CVT_U32_F32 + + // --- description from .arch file --- + // D.u = (unsigned)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_I32_F32 class methods --- + + Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_I32_F32 + + Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() + { + } // ~Inst_VOP3__V_CVT_I32_F32 + + // --- description from .arch file --- + // D.i = (int)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MOV_FED_B32 class methods --- + + Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mov_fed_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MOV_FED_B32 + + Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() + { + } // ~Inst_VOP3__V_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u; + // Introduce EDC double error upon write to dest vgpr without causing an + // --- exception. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_F32 class methods --- + + Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F16_F32 + + Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() + { + } // ~Inst_VOP3__V_CVT_F16_F32 + + // --- description from .arch file --- + // D.f16 = flt32_to_flt16(S0.f). + // Supports input modifiers and creates FP16 denormals when appropriate. + void + Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F32_F16 class methods --- + + Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_F16 + + Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() + { + } // ~Inst_VOP3__V_CVT_F32_F16 + + // --- description from .arch file --- + // D.f = flt16_to_flt32(S0.f16). + // FP16 denormal inputs are always accepted. + void + Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods --- + + Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_RPI_I32_F32 + + Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() + { + } // ~Inst_VOP3__V_CVT_RPI_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f + 0.5). + void + Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods --- + + Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_FLR_I32_F32 + + Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() + { + } // ~Inst_VOP3__V_CVT_FLR_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f). + void + Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods --- + + Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_OFF_F32_I4 + + Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() + { + } // ~Inst_VOP3__V_CVT_OFF_F32_I4 + + // --- description from .arch file --- + // 4-bit signed int to 32-bit float. Used for interpolation in shader. + void + Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) + { + // Could not parse sq_uc.arch desc field + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F32_F64 class methods --- + + Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F32_F64 + + Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() + { + } // ~Inst_VOP3__V_CVT_F32_F64 + + // --- description from .arch file --- + // D.f = (float)S0.d. + void + Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_F32 class methods --- + + Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_F32 + + Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() + { + } // ~Inst_VOP3__V_CVT_F64_F32 + + // --- description from .arch file --- + // D.d = (double)S0.f. + void + Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE0 + + Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE0 + + // --- description from .arch file --- + // D.f = (float)(S0.u[7:0]). + void + Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE1 + + Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE1 + + // --- description from .arch file --- + // D.f = (float)(S0.u[15:8]). + void + Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE2 + + Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE2 + + // --- description from .arch file --- + // D.f = (float)(S0.u[23:16]). + void + Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE3 + + Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE3 + + // --- description from .arch file --- + // D.f = (float)(S0.u[31:24]). + void + Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_U32_F64 class methods --- + + Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_U32_F64 + + Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() + { + } // ~Inst_VOP3__V_CVT_U32_F64 + + // --- description from .arch file --- + // D.u = (unsigned)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_U32 class methods --- + + Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_U32 + + Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() + { + } // ~Inst_VOP3__V_CVT_F64_U32 + + // --- description from .arch file --- + // D.d = (double)S0.u. + void + Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRUNC_F64 class methods --- + + Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_TRUNC_F64 + + Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() + { + } // ~Inst_VOP3__V_TRUNC_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d), return integer part of S0.d. + void + Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CEIL_F64 class methods --- + + Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CEIL_F64 + + Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() + { + } // ~Inst_VOP3__V_CEIL_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. + void + Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RNDNE_F64 class methods --- + + Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RNDNE_F64 + + Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() + { + } // ~Inst_VOP3__V_RNDNE_F64 + + // --- description from .arch file --- + // D.d = round_nearest_even(S0.d). + void + Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FLOOR_F64 class methods --- + + Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FLOOR_F64 + + Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() + { + } // ~Inst_VOP3__V_FLOOR_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. + void + Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FRACT_F32 class methods --- + + Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FRACT_F32 + + Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() + { + } // ~Inst_VOP3__V_FRACT_F32 + + // --- description from .arch file --- + // D.f = S0.f - floor(S0.f). + void + Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRUNC_F32 class methods --- + + Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_TRUNC_F32 + + Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() + { + } // ~Inst_VOP3__V_TRUNC_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f), return integer part of S0.f. + void + Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CEIL_F32 class methods --- + + Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CEIL_F32 + + Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() + { + } // ~Inst_VOP3__V_CEIL_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. + void + Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RNDNE_F32 class methods --- + + Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RNDNE_F32 + + Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() + { + } // ~Inst_VOP3__V_RNDNE_F32 + + // --- description from .arch file --- + // D.f = round_nearest_even(S0.f). + void + Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FLOOR_F32 class methods --- + + Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FLOOR_F32 + + Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() + { + } // ~Inst_VOP3__V_FLOOR_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. + void + Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_EXP_F32 class methods --- + + Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_EXP_F32 + + Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() + { + } // ~Inst_VOP3__V_EXP_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f). + void + Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LOG_F32 class methods --- + + Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LOG_F32 + + Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() + { + } // ~Inst_VOP3__V_LOG_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm. + void + Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_F32 class methods --- + + Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RCP_F32 + + Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() + { + } // ~Inst_VOP3__V_RCP_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. + void + Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods --- + + Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RCP_IFLAG_F32 + + Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() + { + } // ~Inst_VOP3__V_RCP_IFLAG_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise + // --- integer DIV_BY_ZERO exception but cannot raise floating-point + // --- exceptions. + void + Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RSQ_F32 class methods --- + + Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RSQ_F32 + + Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() + { + } // ~Inst_VOP3__V_RSQ_F32 + + // --- description from .arch file --- + // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. + void + Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_F64 class methods --- + + Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RCP_F64 + + Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() + { + } // ~Inst_VOP3__V_RCP_F64 + + // --- description from .arch file --- + // D.d = 1.0 / S0.d. + void + Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = 1.0 / src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RSQ_F64 class methods --- + + Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RSQ_F64 + + Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() + { + } // ~Inst_VOP3__V_RSQ_F64 + + // --- description from .arch file --- + // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. + void + Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { + vdst[lane] = 0.0; + } else if (std::signbit(src[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SQRT_F32 class methods --- + + Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SQRT_F32 + + Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() + { + } // ~Inst_VOP3__V_SQRT_F32 + + // --- description from .arch file --- + // D.f = sqrt(S0.f). + void + Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SQRT_F64 class methods --- + + Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_SQRT_F64 + + Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() + { + } // ~Inst_VOP3__V_SQRT_F64 + + // --- description from .arch file --- + // D.d = sqrt(S0.d). + void + Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SIN_F32 class methods --- + + Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sin_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SIN_F32 + + Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() + { + } // ~Inst_VOP3__V_SIN_F32 + + // --- description from .arch file --- + // D.f = sin(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 0.0. + void + Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_COS_F32 class methods --- + + Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cos_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_COS_F32 + + Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() + { + } // ~Inst_VOP3__V_COS_F32 + + // --- description from .arch file --- + // D.f = cos(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 1.0. + void + Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_NOT_B32 class methods --- + + Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_not_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_NOT_B32 + + Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() + { + } // ~Inst_VOP3__V_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ~src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFREV_B32 class methods --- + + Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFREV_B32 + + Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() + { + } // ~Inst_VOP3__V_BFREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31], bitfield reverse. + // Input and output modifiers not supported. + void + Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = reverseBits(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBH_U32 class methods --- + + Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbh_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBH_U32 + + Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() + { + } // ~Inst_VOP3__V_FFBH_U32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from MSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOneMsb(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBL_B32 class methods --- + + Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbl_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBL_B32 + + Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() + { + } // ~Inst_VOP3__V_FFBL_B32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from LSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOne(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBH_I32 class methods --- + + Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbh_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBH_I32 + + Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() + { + } // ~Inst_VOP3__V_FFBH_I32 + + // --- description from .arch file --- + // D.u = position of first bit different from sign bit in S0.i from MSB; + // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. + void + Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = firstOppositeSignBit(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods --- + + Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FREXP_EXP_I32_F64 + + Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() + { + } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 + + // --- description from .arch file --- + // See V_FREXP_EXP_I32_F32. + void + Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F64 class methods --- + + Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FREXP_MANT_F64 + + Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() + { + } // ~Inst_VOP3__V_FREXP_MANT_F64 + + // --- description from .arch file --- + // See V_FREXP_MANT_F32. + void + Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FRACT_F64 class methods --- + + Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FRACT_F64 + + Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() + { + } // ~Inst_VOP3__V_FRACT_F64 + + // --- description from .arch file --- + // See V_FRACT_F32. + void + Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods --- + + Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FREXP_EXP_I32_F32 + + Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() + { + } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.i = 0; + // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). + // Returns exponent of single precision float input, such that S0.f = + // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns + // the significand. + void + Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane])|| std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F32 class methods --- + + Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FREXP_MANT_F32 + + Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() + { + } // ~Inst_VOP3__V_FREXP_MANT_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; + // else D.f = Mantissa(S0.f). + // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary + // --- significand of single precision float input, such that S0.f = + // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which + // --- returns integer exponent. + void + Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CLREXCP class methods --- + + Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_clrexcp", false) + { + } // Inst_VOP3__V_CLREXCP + + Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() + { + } // ~Inst_VOP3__V_CLREXCP + + // --- description from .arch file --- + // Clear wave's exception state in SIMD (SP). + void + Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_U16 class methods --- + + Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_F16_U16 + + Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() + { + } // ~Inst_VOP3__V_CVT_F16_U16 + + // --- description from .arch file --- + // D.f16 = uint16_to_flt16(S.u16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_I16 class methods --- + + Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_F16_I16 + + Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() + { + } // ~Inst_VOP3__V_CVT_F16_I16 + + // --- description from .arch file --- + // D.f16 = int16_to_flt16(S.i16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_U16_F16 class methods --- + + Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_U16_F16 + + Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() + { + } // ~Inst_VOP3__V_CVT_U16_F16 + + // --- description from .arch file --- + // D.u16 = flt16_to_uint16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_I16_F16 class methods --- + + Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_I16_F16 + + Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() + { + } // ~Inst_VOP3__V_CVT_I16_F16 + + // --- description from .arch file --- + // D.i16 = flt16_to_int16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RCP_F16 class methods --- + + Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RCP_F16 + + Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() + { + } // ~Inst_VOP3__V_RCP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecip(S0.f16). + void + Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SQRT_F16 class methods --- + + Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SQRT_F16 + + Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() + { + } // ~Inst_VOP3__V_SQRT_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateSqrt(S0.f16). + void + Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RSQ_F16 class methods --- + + Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RSQ_F16 + + Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() + { + } // ~Inst_VOP3__V_RSQ_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecipSqrt(S0.f16). + void + Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_LOG_F16 class methods --- + + Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_LOG_F16 + + Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() + { + } // ~Inst_VOP3__V_LOG_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 0.0f; + // else + // D.f16 = ApproximateLog2(S0.f16). + void + Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_EXP_F16 class methods --- + + Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_EXP_F16 + + Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() + { + } // ~Inst_VOP3__V_EXP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 0.0f) + // D.f16 = 1.0f; + // else + // D.f16 = Approximate2ToX(S0.f16). + void + Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F16 class methods --- + + Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FREXP_MANT_F16 + + Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() + { + } // ~Inst_VOP3__V_FREXP_MANT_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.f16 = S0.f16; + // else + // D.f16 = mantissa(S0.f16). + // Result range is (-1.0,-0.5][0.5,1.0). + // C math library frexp function. + // Returns binary significand of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods --- + + Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FREXP_EXP_I16_F16 + + Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() + { + } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.i16 = 0; + // else + // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). + // C math library frexp function. + // Returns exponent of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FLOOR_F16 class methods --- + + Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FLOOR_F16 + + Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() + { + } // ~Inst_VOP3__V_FLOOR_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. + void + Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CEIL_F16 class methods --- + + Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CEIL_F16 + + Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() + { + } // ~Inst_VOP3__V_CEIL_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. + void + Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_TRUNC_F16 class methods --- + + Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_TRUNC_F16 + + Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() + { + } // ~Inst_VOP3__V_TRUNC_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16). + // Round-to-zero semantics. + void + Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RNDNE_F16 class methods --- + + Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RNDNE_F16 + + Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() + { + } // ~Inst_VOP3__V_RNDNE_F16 + + // --- description from .arch file --- + // D.f16 = FLOOR(S0.f16 + 0.5f); + // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. + // Round-to-nearest-even semantics. + void + Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FRACT_F16 class methods --- + + Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FRACT_F16 + + Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() + { + } // ~Inst_VOP3__V_FRACT_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + -floor(S0.f16). + void + Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SIN_F16 class methods --- + + Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sin_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SIN_F16 + + Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() + { + } // ~Inst_VOP3__V_SIN_F16 + + // --- description from .arch file --- + // D.f16 = sin(S0.f16 * 2 * PI). + void + Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_COS_F16 class methods --- + + Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cos_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_COS_F16 + + Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() + { + } // ~Inst_VOP3__V_COS_F16 + + // --- description from .arch file --- + // D.f16 = cos(S0.f16 * 2 * PI). + void + Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_EXP_LEGACY_F32 class methods --- + + Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_EXP_LEGACY_F32 + + Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() + { + } // ~Inst_VOP3__V_EXP_LEGACY_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f) with legacy semantics. + void + Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LOG_LEGACY_F32 class methods --- + + Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LOG_LEGACY_F32 + + Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() + { + } // ~Inst_VOP3__V_LOG_LEGACY_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. + void + Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_LEGACY_F32 class methods --- + + Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP3__V_MAD_LEGACY_F32 + + Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() + { + } // ~Inst_VOP3__V_MAD_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f (DX9 rules, 0.0 * x = 0.0). + void + Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_F32 class methods --- + + Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP3__V_MAD_F32 + + Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() + { + } // ~Inst_VOP3__V_MAD_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f. + void + Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I32_I24 class methods --- + + Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_i32_i24", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I32_I24 + + Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() + { + } // ~Inst_VOP3__V_MAD_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. + void + Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_U32_U24 class methods --- + + Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_u32_u24", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U32_U24 + + Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() + { + } // ~Inst_VOP3__V_MAD_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. + void + Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) + + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CUBEID_F32 class methods --- + + Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubeid_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBEID_F32 + + Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() + { + } // ~Inst_VOP3__V_CUBEID_F32 + + // --- description from .arch file --- + // D.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). XYZ coordinate is given in + // --- (S0.f, S1.f, S2.f). + void + Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBESC_F32 class methods --- + + Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubesc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBESC_F32 + + Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() + { + } // ~Inst_VOP3__V_CUBESC_F32 + + // --- description from .arch file --- + // D.f = cubemap S coordinate. XYZ coordinate is given in (S0.f, S1.f, + // S2.f). + void + Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBETC_F32 class methods --- + + Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubetc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBETC_F32 + + Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() + { + } // ~Inst_VOP3__V_CUBETC_F32 + + // --- description from .arch file --- + // D.f = cubemap T coordinate. XYZ coordinate is given in (S0.f, S1.f, + // S2.f). + void + Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBEMA_F32 class methods --- + + Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubema_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBEMA_F32 + + Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() + { + } // ~Inst_VOP3__V_CUBEMA_F32 + + // --- description from .arch file --- + // D.f = 2.0 * cubemap major axis. XYZ coordinate is given in (S0.f, S1.f, + // --- S2.f). + void + Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_BFE_U32 class methods --- + + Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfe_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFE_U32 + + Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() + { + } // ~Inst_VOP3__V_BFE_U32 + + // --- description from .arch file --- + // D.u = (S0.u>>S1.u[4:0]) & ((1<wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) + & ((1 << bits(src2[lane], 4, 0)) - 1); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFE_I32 class methods --- + + Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfe_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFE_I32 + + Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() + { + } // ~Inst_VOP3__V_BFE_I32 + + // --- description from .arch file --- + // D.i = (S0.i>>S1.u[4:0]) & ((1<wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) + & ((1 << bits(src2[lane], 4, 0)) - 1); + + // Above extracted a signed int of size src2 bits which needs + // to be signed-extended. Check if the MSB of our src2-bit + // integer is 1, and sign extend it is. + if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) { + vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFI_B32 class methods --- + + Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfi_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFI_B32 + + Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() + { + } // ~Inst_VOP3__V_BFI_B32 + + // --- description from .arch file --- + // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. + void + Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] + & src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F32 class methods --- + + Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F32 + + Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() + { + } // ~Inst_VOP3__V_FMA_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f. + void + Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F64 class methods --- + + Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f64", false) + { + setFlag(ALU); + setFlag(F64); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F64 + + Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() + { + } // ~Inst_VOP3__V_FMA_F64 + + // --- description from .arch file --- + // D.d = S0.d * S1.d + S2.d. + void + Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LERP_U8 class methods --- + + Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lerp_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LERP_U8 + + Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() + { + } // ~Inst_VOP3__V_LERP_U8 + + // --- description from .arch file --- + // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 + // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; + // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; + // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). + // Unsigned 8-bit pixel average on packed unsigned bytes (linear + // --- interpolation). S2 acts as a round mode; if set, 0.5 rounds up, + // --- otherwise 0.5 truncates. + void + Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ((bits(src0[lane], 31, 24) + + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) + << 24; + vdst[lane] += ((bits(src0[lane], 23, 16) + + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) + << 16; + vdst[lane] += ((bits(src0[lane], 15, 8) + + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) + << 8; + vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) + + bits(src2[lane], 0)) >> 1); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ALIGNBIT_B32 class methods --- + + Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_alignbit_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ALIGNBIT_B32 + + Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() + { + } // ~Inst_VOP3__V_ALIGNBIT_B32 + + // --- description from .arch file --- + // D.u = ({S0,S1} >> S2.u[4:0]) & 0xffffffff. + void + Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) + | (VecElemU64)src1[lane]); + vdst[lane] = (VecElemU32)((src_0_1 + >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ALIGNBYTE_B32 class methods --- + + Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_alignbyte_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ALIGNBYTE_B32 + + Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() + { + } // ~Inst_VOP3__V_ALIGNBYTE_B32 + + // --- description from .arch file --- + // D.u = ({S0,S1} >> (8*S2.u[4:0])) & 0xffffffff. + void + Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) + | (VecElemU64)src1[lane]); + vdst[lane] = (VecElemU32)((src_0_1 + >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) + & 0xffffffff); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_F32 class methods --- + + Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MIN3_F32 + + Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() + { + } // ~Inst_VOP3__V_MIN3_F32 + + // --- description from .arch file --- + // D.f = min(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); + vdst[lane] = std::fmin(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_I32 class methods --- + + Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN3_I32 + + Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() + { + } // ~Inst_VOP3__V_MIN3_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); + vdst[lane] = std::min(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_U32 class methods --- + + Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN3_U32 + + Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() + { + } // ~Inst_VOP3__V_MIN3_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); + vdst[lane] = std::min(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_F32 class methods --- + + Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MAX3_F32 + + Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() + { + } // ~Inst_VOP3__V_MAX3_F32 + + // --- description from .arch file --- + // D.f = max(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); + vdst[lane] = std::fmax(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_I32 class methods --- + + Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX3_I32 + + Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() + { + } // ~Inst_VOP3__V_MAX3_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); + vdst[lane] = std::max(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_U32 class methods --- + + Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX3_U32 + + Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() + { + } // ~Inst_VOP3__V_MAX3_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); + vdst[lane] = std::max(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_F32 class methods --- + + Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MED3_F32 + + Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() + { + } // ~Inst_VOP3__V_MED3_F32 + + // --- description from .arch file --- + // D.f = median(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_I32 class methods --- + + Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MED3_I32 + + Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() + { + } // ~Inst_VOP3__V_MED3_I32 + + // --- description from .arch file --- + // D.i = median(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_U32 class methods --- + + Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MED3_U32 + + Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() + { + } // ~Inst_VOP3__V_MED3_U32 + + // --- description from .arch file --- + // D.u = median(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U8 class methods --- + + Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U8 + + Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() + { + } // ~Inst_VOP3__V_SAD_U8 + + // --- description from .arch file --- + // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + + // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. + // Sum of absolute differences with accumulation, overflow into upper bits + // is allowed. + void + Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(bits(src0[lane], 31, 24) + - bits(src1[lane], 31, 24)) + + std::abs(bits(src0[lane], 23, 16) + - bits(src1[lane], 23, 16)) + + std::abs(bits(src0[lane], 15, 8) + - bits(src1[lane], 15, 8)) + + std::abs(bits(src0[lane], 7, 0) + - bits(src1[lane], 7, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_HI_U8 class methods --- + + Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_hi_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_HI_U8 + + Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() + { + } // ~Inst_VOP3__V_SAD_HI_U8 + + // --- description from .arch file --- + // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. + // Sum of absolute differences with accumulation, overflow is lost. + void + Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (((bits(src0[lane], 31, 24) + - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) + - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) + - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) + - bits(src1[lane], 7, 0))) << 16) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U16 class methods --- + + Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U16 + + Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() + { + } // ~Inst_VOP3__V_SAD_U16 + + // --- description from .arch file --- + // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) + // + S2.u. + // Word SAD with accumulation. + void + Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(bits(src0[lane], 31, 16) + - bits(src1[lane], 31, 16)) + + std::abs(bits(src0[lane], 15, 0) + - bits(src1[lane], 15, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U32 class methods --- + + Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U32 + + Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() + { + } // ~Inst_VOP3__V_SAD_U32 + + // --- description from .arch file --- + // D.u = abs(S0.i - S1.i) + S2.u. + // Dword SAD with accumulation. + void + Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; + } // if + } // for + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_PK_U8_F32 class methods --- + + Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_u8_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PK_U8_F32 + + Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() + { + } // ~Inst_VOP3__V_CVT_PK_U8_F32 + + // --- description from .arch file --- + // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) + // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). + // Convert floating point value S0 to 8-bit unsigned integer and pack the + // result into byte S1 of dword S2. + void + Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (((VecElemU8)src0[lane] & 0xff) + << (8 * bits(src1[lane], 1, 0))) + | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F32 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_DIV_FIXUP_F32 + + Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F32 + + // --- description from .arch file --- + // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, + // s2.f = Numerator. This opcode generates exceptions resulting from the + // division operation. + void + Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::signbit(src1[lane])) { + vdst[lane] = -INFINITY; + } else { + vdst[lane] = +INFINITY; + } + } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src1[lane])) { + if (std::signbit(src1[lane])) { + vdst[lane] = -INFINITY; + } else { + vdst[lane] = +INFINITY; + } + } else { + vdst[lane] = src2[lane] / src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_DIV_FIXUP_F64 + + Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F64 + + // --- description from .arch file --- + // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, + // s2.d = Numerator. This opcode generates exceptions resulting from the + // division operation. + void + Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int sign_out = std::signbit(src1[lane]) + ^ std::signbit(src2[lane]); + int exp1(0); + int exp2(0); + std::frexp(src1[lane], &exp1); + std::frexp(src2[lane], &exp2); + + if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { + vdst[lane] = std::numeric_limits::quiet_NaN(); + } else if (std::fpclassify(src1[lane]) == FP_ZERO + && std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] + = std::numeric_limits::signaling_NaN(); + } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { + vdst[lane] + = std::numeric_limits::signaling_NaN(); + } else if (std::fpclassify(src1[lane]) == FP_ZERO + || std::isinf(src2[lane])) { + vdst[lane] = sign_out ? -INFINITY : +INFINITY; + } else if (std::isinf(src1[lane]) + || std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] = sign_out ? -0.0 : +0.0; + } else if (exp2 - exp1 < -1075) { + vdst[lane] = src0[lane]; + } else if (exp1 == 2047) { + vdst[lane] = src0[lane]; + } else { + vdst[lane] = sign_out ? -std::fabs(src0[lane]) + : std::fabs(src0[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_SCALE_F32 class methods --- + + Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_div_scale_f32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(F32); + } // Inst_VOP3__V_DIV_SCALE_F32 + + Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() + { + } // ~Inst_VOP3__V_DIV_SCALE_F32 + + // --- description from .arch file --- + // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = + // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a + // numerator and denominator, this opcode will appropriately scale inputs + // for division to avoid subnormal terms during Newton-Raphson correction + // algorithm. This opcode producses a VCC flag for post-scale of quotient. + void + Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane]; + vcc.setBit(lane, 0); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- + + Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_div_scale_f64") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(F64); + } // Inst_VOP3__V_DIV_SCALE_F64 + + Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() + { + } // ~Inst_VOP3__V_DIV_SCALE_F64 + + // --- description from .arch file --- + // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = + // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a + // numerator and denominator, this opcode will appropriately scale inputs + // for division to avoid subnormal terms during Newton-Raphson correction + // algorithm. This opcode producses a VCC flag for post-scale of quotient. + void + Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp1(0); + int exp2(0); + std::frexp(src1[lane], &exp1); + std::frexp(src2[lane], &exp2); + vcc.setBit(lane, 0); + + if (std::fpclassify(src1[lane]) == FP_ZERO + || std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (exp2 - exp1 >= 768) { + vcc.setBit(lane, 1); + if (src0[lane] == src1[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { + vdst[lane] = std::ldexp(src0[lane], 128); + } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL + && std::fpclassify(src2[lane] / src1[lane]) + == FP_SUBNORMAL) { + vcc.setBit(lane, 1); + if (src0[lane] == src1[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { + vdst[lane] = std::ldexp(src0[lane], -128); + } else if (std::fpclassify(src2[lane] / src1[lane]) + == FP_SUBNORMAL) { + vcc.setBit(lane, 1); + if (src0[lane] == src2[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (exp2 <= 53) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FMAS_F32 class methods --- + + Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fmas_f32", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + setFlag(F32); + setFlag(FMA); + } // Inst_VOP3__V_DIV_FMAS_F32 + + Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() + { + } // ~Inst_VOP3__V_DIV_FMAS_F32 + + // --- description from .arch file --- + // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, + // s1.f = Denominator, s2.f = Numerator) + void + Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + //vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- + + Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fmas_f64", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + setFlag(F64); + setFlag(FMA); + } // Inst_VOP3__V_DIV_FMAS_F64 + + Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() + { + } // ~Inst_VOP3__V_DIV_FMAS_F64 + + // --- description from .arch file --- + // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, + // s1.d = Denominator, s2.d = Numerator) + void + Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + vcc.read(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(vcc.rawData(), lane)) { + vdst[lane] = std::pow(2, 64) + * std::fma(src0[lane], src1[lane], src2[lane]); + } else { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MSAD_U8 class methods --- + + Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_msad_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MSAD_U8 + + Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() + { + } // ~Inst_VOP3__V_MSAD_U8 + + // --- description from .arch file --- + // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_QSAD_PK_U16_U8 class methods --- + + Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_qsad_pk_u16_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_QSAD_PK_U16_U8 + + Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() + { + } // ~Inst_VOP3__V_QSAD_PK_U16_U8 + + // --- description from .arch file --- + // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], + // S1.u[31:0], S2.u[63:0]) + void + Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MQSAD_PK_U16_U8 class methods --- + + Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mqsad_pk_u16_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MQSAD_PK_U16_U8 + + Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() + { + } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 + + // --- description from .arch file --- + // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], + // --- S1.u[31:0], S2.u[63:0]) + void + Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MQSAD_U32_U8 class methods --- + + Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mqsad_u32_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MQSAD_U32_U8 + + Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() + { + } // ~Inst_VOP3__V_MQSAD_U32_U8 + + // --- description from .arch file --- + // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], + // --- S1.u[31:0], S2.u[127:0]) + void + Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAD_U64_U32 class methods --- + + Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_mad_u64_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U64_U32 + + Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() + { + } // ~Inst_VOP3__V_MAD_U64_U32 + + // --- description from .arch file --- + // {vcc_out,D.u64} = S0.u32 * S1.u32 + S2.u64. + void + Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + vdst.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], + src2[lane])); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I64_I32 class methods --- + + Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_mad_i64_i32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I64_I32 + + Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() + { + } // ~Inst_VOP3__V_MAD_I64_I32 + + // --- description from .arch file --- + // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. + void + Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandI64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], + src2[lane])); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_XAD_U32 class methods --- + + Inst_VOP3__V_XAD_U32::Inst_VOP3__V_XAD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_xad_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_XAD_U32 + + Inst_VOP3__V_XAD_U32::~Inst_VOP3__V_XAD_U32() + { + } // ~Inst_VOP3__V_XAD_U32 + + // --- description from .arch file --- + // D.u32 = (S0.u32 ^ S1.u32) + S2.u32. + void + Inst_VOP3__V_XAD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHL_ADD_U32 class methods --- + + Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshl_add_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHL_ADD_U32 + + Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32() + { + } // ~Inst_VOP3__V_LSHL_ADD_U32 + + // --- description from .arch file --- + // D.u = (S0.u << S1.u[4:0]) + S2.u. + void + Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) + + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_LSHL_U32 class methods --- + + Inst_VOP3__V_ADD_LSHL_U32::Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_lshl_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_LSHL_U32 + + Inst_VOP3__V_ADD_LSHL_U32::~Inst_VOP3__V_ADD_LSHL_U32() + { + } // ~Inst_VOP3__V_ADD_LSHL_U32 + + // --- description from .arch file --- + // D.u = (S0.u + S1.u) << S2.u[4:0]. + void + Inst_VOP3__V_ADD_LSHL_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = + (src0[lane] + src1[lane]) << bits(src2[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD3_U32 class methods --- + + Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD3_U32 + + Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32() + { + } // ~Inst_VOP3__V_ADD3_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + S2.u. + void + Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHL_OR_B32 class methods --- + + Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshl_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHL_OR_B32 + + Inst_VOP3__V_LSHL_OR_B32::~Inst_VOP3__V_LSHL_OR_B32() + { + } // ~Inst_VOP3__V_LSHL_OR_B32 + + // --- description from .arch file --- + // D.u = (S0.u << S1.u[4:0]) | S2.u. + void + Inst_VOP3__V_LSHL_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) + | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_AND_OR_B32 class methods --- + + Inst_VOP3__V_AND_OR_B32::Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_and_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_AND_OR_B32 + + Inst_VOP3__V_AND_OR_B32::~Inst_VOP3__V_AND_OR_B32() + { + } // ~Inst_VOP3__V_AND_OR_B32 + + // --- description from .arch file --- + // D.u = (S0.u & S1.u) | S2.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_AND_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] & src1[lane]) | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_F16 class methods --- + + Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP3__V_MAD_F16 + + Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() + { + } // ~Inst_VOP3__V_MAD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + S2.f16. + // Supports round mode, exception flags, saturation. + void + Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAD_U16 class methods --- + + Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_u16", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U16 + + Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() + { + } // ~Inst_VOP3__V_MAD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16 + S2.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I16 class methods --- + + Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_i16", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I16 + + Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() + { + } // ~Inst_VOP3__V_MAD_I16 + + // --- description from .arch file --- + // D.i16 = S0.i16 * S1.i16 + S2.i16. + // Supports saturation (signed 16-bit integer domain). + void + Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_PERM_B32 class methods --- + + Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_perm_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_PERM_B32 + + Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() + { + } // ~Inst_VOP3__V_PERM_B32 + + // --- description from .arch file --- + // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); + // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); + // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); + // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); + // byte permute(byte in[8], byte sel) { + // if (sel>=13) then return 0xff; + // elsif(sel==12) then return 0x00; + // elsif(sel==11) then return in[7][7] * 0xff; + // elsif(sel==10) then return in[5][7] * 0xff; + // elsif(sel==9) then return in[3][7] * 0xff; + // elsif(sel==8) then return in[1][7] * 0xff; + // else return in[sel]; + // } + // Byte permute. + void + Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 selector = (VecElemU64)src0[lane]; + selector = (selector << 32) | (VecElemU64)src1[lane]; + vdst[lane] = 0; + + DPRINTF(VEGA, "Executing v_perm_b32 src_0 0x%08x, src_1 " + "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], + src1[lane], src2[lane], vdst[lane]); + DPRINTF(VEGA, "Selector: 0x%08x \n", selector); + + for (int i = 0; i < 4 ; ++i) { + VecElemU32 permuted_val = permute(selector, 0xFF + & ((VecElemU32)src2[lane] >> (8 * i))); + vdst[lane] |= (permuted_val << (8 * i)); + } + + DPRINTF(VEGA, "v_perm result: 0x%08x\n", vdst[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F16 class methods --- + + Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F16 + + Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() + { + } // ~Inst_VOP3__V_FMA_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + S2.f16. + // Fused half precision multiply add. + void + Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F16 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_DIV_FIXUP_F16 + + Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F16 + + // --- description from .arch file --- + // sign_out = sign(S1.f16)^sign(S2.f16); + // if (S2.f16 == NAN) + // D.f16 = Quiet(S2.f16); + // else if (S1.f16 == NAN) + // D.f16 = Quiet(S1.f16); + // else if (S1.f16 == S2.f16 == 0) + // # 0/0 + // D.f16 = pele_nan(0xfe00); + // else if (abs(S1.f16) == abs(S2.f16) == +-INF) + // # inf/inf + // D.f16 = pele_nan(0xfe00); + // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) + // # x/0, or inf/y + // D.f16 = sign_out ? -INF : INF; + // else if (abs(S1.f16) == +-INF || S2.f16 == 0) + // # x/inf, 0/y + // D.f16 = sign_out ? -0 : 0; + // else if ((exp(S2.f16) - exp(S1.f16)) < -150) + // D.f16 = sign_out ? -underflow : underflow; + // else if (exp(S1.f16) == 255) + // D.f16 = sign_out ? -overflow : overflow; + // else + // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). + // Half precision division fixup. + // S0 = Quotient, S1 = Denominator, S3 = Numerator. + // Given a numerator, denominator, and quotient from a divide, this opcode + // will detect and apply special case numerics, touching up the quotient if + // necessary. This opcode also generates invalid, denorm and divide by + // zero exceptions caused by the division. + void + Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods --- + + Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pkaccum_u8_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 + + Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() + { + } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 + + // --- description from .arch file --- + // byte = S1.u[1:0]; bit = byte * 8; + // D.u[bit+7:bit] = flt32_to_uint8(S0.f); + // Pack converted value of S0.f into byte S1 of the destination. + // SQ translates to V_CVT_PK_U8_F32. + // Note: this opcode uses src_c to pass destination in as a source. + void + Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1_F32 class methods --- + + Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_P1_F32 + + Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() + { + } // ~Inst_VOP3__V_INTERP_P1_F32 + + // --- description from .arch file --- + // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; if + // D == S then data corruption will occur. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P2_F32 class methods --- + + Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p2_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_P2_F32 + + Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() + { + } // ~Inst_VOP3__V_INTERP_P2_F32 + + // --- description from .arch file --- + // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_MOV_F32 class methods --- + + Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_mov_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_MOV_F32 + + Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() + { + } // ~Inst_VOP3__V_INTERP_MOV_F32 + + // --- description from .arch file --- + // D.f = {P10,P20,P0}[S.u]; parameter load. + void + Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1LL_F16 class methods --- + + Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1ll_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P1LL_F16 + + Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() + { + } // ~Inst_VOP3__V_INTERP_P1LL_F16 + + // --- description from .arch file --- + // D.f32 = P10.f16 * S0.f32 + P0.f16. + // 'LL' stands for 'two LDS arguments'. + // attr_word selects the high or low half 16 bits of each LDS dword + // accessed. + // This opcode is available for 32-bank LDS only. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1LV_F16 class methods --- + + Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1lv_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P1LV_F16 + + Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() + { + } // ~Inst_VOP3__V_INTERP_P1LV_F16 + + // --- description from .arch file --- + // D.f32 = P10.f16 * S0.f32 + (S2.u32 >> (attr_word * 16)).f16. + // 'LV' stands for 'One LDS and one VGPR argument'. + // S2 holds two parameters, attr_word selects the high or low word of the + // VGPR for this calculation, as well as the high or low half of the LDS + // data. + // Meant for use with 16-bank LDS. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P2_F16 class methods --- + + Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p2_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P2_F16 + + Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() + { + } // ~Inst_VOP3__V_INTERP_P2_F16 + + // --- description from .arch file --- + // D.f16 = P20.f16 * S0.f32 + S2.f32. + // Final computation. attr_word selects LDS high or low 16bits. Used for + // both 16- and 32-bank LDS. + // Result is always written to the 16 LSBs of the destination VGPR. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_F64 class methods --- + + Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_ADD_F64 + + Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() + { + } // ~Inst_VOP3__V_ADD_F64 + + // --- description from .arch file --- + // D.d = S0.d + S1.d. + void + Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane]) ) { + vdst[lane] = NAN; + } else if (std::isinf(src0[lane]) && + std::isinf(src1[lane])) { + if (std::signbit(src0[lane]) != + std::signbit(src1[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = src0[lane]; + } + } else if (std::isinf(src0[lane])) { + vdst[lane] = src0[lane]; + } else if (std::isinf(src1[lane])) { + vdst[lane] = src1[lane]; + } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::signbit(src0[lane]) && + std::signbit(src1[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = src1[lane]; + } + } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::signbit(src0[lane]) && + std::signbit(src1[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = src0[lane]; + } + } else { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_F64 class methods --- + + Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MUL_F64 + + Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() + { + } // ~Inst_VOP3__V_MUL_F64 + + // --- description from .arch file --- + // D.d = S0.d * S1.d. + void + Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_F64 class methods --- + + Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MIN_F64 + + Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() + { + } // ~Inst_VOP3__V_MIN_F64 + + // --- description from .arch file --- + // D.d = min(S0.d, S1.d). + void + Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F64 class methods --- + + Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MAX_F64 + + Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() + { + } // ~Inst_VOP3__V_MAX_F64 + + // --- description from .arch file --- + // D.d = max(S0.d, S1.d). + void + Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F64 class methods --- + + Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_LDEXP_F64 + + Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() + { + } // ~Inst_VOP3__V_LDEXP_F64 + + // --- description from .arch file --- + // D.d = pow(S0.d, S1.i[31:0]). + void + Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { + vdst[lane] = src0[lane]; + } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + || std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::signbit(src0[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = +0.0; + } + } else { + vdst[lane] = std::ldexp(src0[lane], src1[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LO_U32 class methods --- + + Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_lo_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_LO_U32 + + Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() + { + } // ~Inst_VOP3__V_MUL_LO_U32 + + // --- description from .arch file --- + // D.u = S0.u * S1.u. + void + Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_U32 class methods --- + + Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_U32 + + Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() + { + } // ~Inst_VOP3__V_MUL_HI_U32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32. + void + Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] + = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_I32 class methods --- + + Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_I32 + + Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() + { + } // ~Inst_VOP3__V_MUL_HI_I32 + + // --- description from .arch file --- + // D.i = (S0.i * S1.i) >> 32. + void + Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] + = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F32 class methods --- + + Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LDEXP_F32 + + Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() + { + } // ~Inst_VOP3__V_LDEXP_F32 + + // --- description from .arch file --- + // D.f = pow(S0.f, S1.i) + void + Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ldexp(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_READLANE_B32 class methods --- + + Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_readlane_b32", true) + { + setFlag(ALU); + setFlag(IgnoreExec); + } // Inst_VOP3__V_READLANE_B32 + + Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() + { + } // ~Inst_VOP3__V_READLANE_B32 + + // --- description from .arch file --- + // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# + // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + sdst = src0[src1.rawData() & 0x3f]; + + sdst.write(); + } // execute + // --- Inst_VOP3__V_WRITELANE_B32 class methods --- + + Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_writelane_b32", false) + { + setFlag(ALU); + setFlag(IgnoreExec); + } // Inst_VOP3__V_WRITELANE_B32 + + Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() + { + } // ~Inst_VOP3__V_WRITELANE_B32 + + // --- description from .arch file --- + // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data + // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores + // exec mask. + // Input and output modifiers not supported; this is an untyped operation. + // SQ translates to V_MOV_B32. + void + Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.read(); + src1.read(); + vdst.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + vdst[src1.rawData() & 0x3f] = src0.rawData(); + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BCNT_U32_B32 class methods --- + + Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bcnt_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BCNT_U32_B32 + + Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() + { + } // ~Inst_VOP3__V_BCNT_U32_B32 + + // --- description from .arch file --- + // D.u = CountOneBits(S0.u) + S1.u. Bit count. + void + Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = popCount(src0[lane]) + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MBCNT_LO_U32_B32 class methods --- + + Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mbcnt_lo_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MBCNT_LO_U32_B32 + + Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() + { + } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 + + // --- description from .arch file --- + // ThreadMask = (1 << ThreadPosition) - 1; + // D.u = CountOneBits(S0.u & ThreadMask[31:0]) + S1.u. + // Masked bit count, ThreadPosition is the position of this thread in the + // --- wavefront (in 0..63). + void + Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1LL << lane) - 1LL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- + + Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mbcnt_hi_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MBCNT_HI_U32_B32 + + Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() + { + } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 + + // --- description from .arch file --- + // ThreadMask = (1 << ThreadPosition) - 1; + // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. + // Masked bit count, ThreadPosition is the position of this thread in the + // --- wavefront (in 0..63). + void + Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1LL << lane) - 1LL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B64 class methods --- + + Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B64 + + Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() + { + } // ~Inst_VOP3__V_LSHLREV_B64 + + // --- description from .arch file --- + // D.u64 = S1.u64 << S0.u[5:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B64 class methods --- + + Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B64 + + Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() + { + } // ~Inst_VOP3__V_LSHRREV_B64 + + // --- description from .arch file --- + // D.u64 = S1.u64 >> S0.u[5:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I64 class methods --- + + Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I64 + + Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() + { + } // ~Inst_VOP3__V_ASHRREV_I64 + + // --- description from .arch file --- + // D.u64 = signext(S1.u64) >> S0.u[5:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src1[lane] >> bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRIG_PREOP_F64 class methods --- + + Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trig_preop_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_TRIG_PREOP_F64 + + Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() + { + } // ~Inst_VOP3__V_TRIG_PREOP_F64 + + // --- description from .arch file --- + // D.d = Look Up 2/PI (S0.d) with segment select S1.u[4:0]. This operation + // returns an aligned, double precision segment of 2/PI needed to do range + // reduction on S0.d (double-precision value). Multiple segments can be + // specified through S1.u[4:0]. Rounding is always round-to-zero. Large + // inputs (exp > 1968) are scaled to avoid loss of precision through + // denormalization. + void + Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_BFM_B32 class methods --- + + Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfm_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFM_B32 + + Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() + { + } // ~Inst_VOP3__V_BFM_B32 + + // --- description from .arch file --- + // D.u = ((1<wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) + << bits(src1[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_PKNORM_I16_F32 class methods --- + + Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pknorm_i16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKNORM_I16_F32 + + Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() + { + } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 + + // --- description from .arch file --- + // D = {(snorm)S1.f, (snorm)S0.f}. + void + Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PKNORM_U16_F32 class methods --- + + Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pknorm_u16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKNORM_U16_F32 + + Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() + { + } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 + + // --- description from .arch file --- + // D = {(unorm)S1.f, (unorm)S0.f}. + void + Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PKRTZ_F16_F32 class methods --- + + Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pkrtz_f16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 + + Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() + { + } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 + + // --- description from .arch file --- + // D = {flt32_to_flt16(S1.f),flt32_to_flt16(S0.f)}, with round-toward-zero + // --- regardless of current round mode setting in hardware. + // This opcode is intended for use with 16-bit compressed exports. + // See V_CVT_F16_F32 for a version that respects the current rounding mode. + void + Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PK_U16_U32 class methods --- + + Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_u16_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_U16_U32 + + Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() + { + } // ~Inst_VOP3__V_CVT_PK_U16_U32 + + // --- description from .arch file --- + // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. + void + Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PK_I16_I32 class methods --- + + Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_i16_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_I16_I32 + + Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() + { + } // ~Inst_VOP3__V_CVT_PK_I16_I32 + + // --- description from .arch file --- + // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. + void + Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3_cmp.cc b/src/arch/amdgpu/vega/insts/vop3_cmp.cc new file mode 100644 index 0000000000..4bbec930e6 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3_cmp.cc @@ -0,0 +1,8145 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP3__V_CMP_CLASS_F32 class methods --- + + Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_CLASS_F32 + + Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() + { + } // ~Inst_VOP3__V_CMP_CLASS_F32 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F32 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F32 + + Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F32 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.f + // The function reports true if the floating point value is *any* of the + // numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_CLASS_F64 class methods --- + + Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_CLASS_F64 + + Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() + { + } // ~Inst_VOP3__V_CMP_CLASS_F64 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.d + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F64 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F64 + + Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F64 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.d + // The function reports true if the floating point value is *any* of the + // numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_CLASS_F16 class methods --- + + Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_CLASS_F16 + + Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() + { + } // ~Inst_VOP3__V_CMP_CLASS_F16 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F16 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F16 + + Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F16 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // --- S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_F_F16 class methods --- + + Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_F_F16 + + Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() + { + } // ~Inst_VOP3__V_CMP_F_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F16 class methods --- + + Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LT_F16 + + Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() + { + } // ~Inst_VOP3__V_CMP_LT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F16 class methods --- + + Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_EQ_F16 + + Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() + { + } // ~Inst_VOP3__V_CMP_EQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F16 class methods --- + + Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LE_F16 + + Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() + { + } // ~Inst_VOP3__V_CMP_LE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F16 class methods --- + + Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_GT_F16 + + Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() + { + } // ~Inst_VOP3__V_CMP_GT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F16 class methods --- + + Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LG_F16 + + Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() + { + } // ~Inst_VOP3__V_CMP_LG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F16 class methods --- + + Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_GE_F16 + + Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() + { + } // ~Inst_VOP3__V_CMP_GE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_O_F16 class methods --- + + Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_O_F16 + + Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() + { + } // ~Inst_VOP3__V_CMP_O_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_U_F16 class methods --- + + Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_U_F16 + + Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() + { + } // ~Inst_VOP3__V_CMP_U_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F16 class methods --- + + Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NGE_F16 + + Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() + { + } // ~Inst_VOP3__V_CMP_NGE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F16 class methods --- + + Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLG_F16 + + Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() + { + } // ~Inst_VOP3__V_CMP_NLG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F16 class methods --- + + Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NGT_F16 + + Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() + { + } // ~Inst_VOP3__V_CMP_NGT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F16 class methods --- + + Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLE_F16 + + Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() + { + } // ~Inst_VOP3__V_CMP_NLE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F16 class methods --- + + Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NEQ_F16 + + Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() + { + } // ~Inst_VOP3__V_CMP_NEQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F16 class methods --- + + Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLT_F16 + + Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() + { + } // ~Inst_VOP3__V_CMP_NLT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F16 class methods --- + + Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_TRU_F16 + + Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() + { + } // ~Inst_VOP3__V_CMP_TRU_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F16 class methods --- + + Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F16 + + Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() + { + } // ~Inst_VOP3__V_CMPX_F_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F16 class methods --- + + Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F16 + + Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() + { + } // ~Inst_VOP3__V_CMPX_LT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F16 class methods --- + + Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F16 + + Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() + { + } // ~Inst_VOP3__V_CMPX_EQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F16 class methods --- + + Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F16 + + Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() + { + } // ~Inst_VOP3__V_CMPX_LE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F16 class methods --- + + Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F16 + + Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() + { + } // ~Inst_VOP3__V_CMPX_GT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F16 class methods --- + + Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F16 + + Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() + { + } // ~Inst_VOP3__V_CMPX_LG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F16 class methods --- + + Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F16 + + Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() + { + } // ~Inst_VOP3__V_CMPX_GE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F16 class methods --- + + Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F16 + + Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() + { + } // ~Inst_VOP3__V_CMPX_O_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F16 class methods --- + + Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F16 + + Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() + { + } // ~Inst_VOP3__V_CMPX_U_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F16 class methods --- + + Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F16 + + Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() + { + } // ~Inst_VOP3__V_CMPX_NGE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F16 class methods --- + + Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F16 + + Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() + { + } // ~Inst_VOP3__V_CMPX_NLG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F16 class methods --- + + Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F16 + + Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() + { + } // ~Inst_VOP3__V_CMPX_NGT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F16 class methods --- + + Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F16 + + Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() + { + } // ~Inst_VOP3__V_CMPX_NLE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F16 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F16 + + Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F16 class methods --- + + Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F16 + + Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() + { + } // ~Inst_VOP3__V_CMPX_NLT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F16 class methods --- + + Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F16 + + Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() + { + } // ~Inst_VOP3__V_CMPX_TRU_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_F32 class methods --- + + Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_F_F32 + + Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() + { + } // ~Inst_VOP3__V_CMP_F_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F32 class methods --- + + Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LT_F32 + + Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() + { + } // ~Inst_VOP3__V_CMP_LT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F32 class methods --- + + Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_EQ_F32 + + Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() + { + } // ~Inst_VOP3__V_CMP_EQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F32 class methods --- + + Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LE_F32 + + Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() + { + } // ~Inst_VOP3__V_CMP_LE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F32 class methods --- + + Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_GT_F32 + + Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() + { + } // ~Inst_VOP3__V_CMP_GT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F32 class methods --- + + Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LG_F32 + + Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() + { + } // ~Inst_VOP3__V_CMP_LG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F32 class methods --- + + Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_GE_F32 + + Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() + { + } // ~Inst_VOP3__V_CMP_GE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_O_F32 class methods --- + + Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_O_F32 + + Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() + { + } // ~Inst_VOP3__V_CMP_O_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_U_F32 class methods --- + + Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_U_F32 + + Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() + { + } // ~Inst_VOP3__V_CMP_U_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F32 class methods --- + + Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NGE_F32 + + Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() + { + } // ~Inst_VOP3__V_CMP_NGE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F32 class methods --- + + Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLG_F32 + + Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() + { + } // ~Inst_VOP3__V_CMP_NLG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F32 class methods --- + + Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NGT_F32 + + Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() + { + } // ~Inst_VOP3__V_CMP_NGT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F32 class methods --- + + Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLE_F32 + + Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() + { + } // ~Inst_VOP3__V_CMP_NLE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F32 class methods --- + + Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NEQ_F32 + + Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() + { + } // ~Inst_VOP3__V_CMP_NEQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F32 class methods --- + + Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLT_F32 + + Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() + { + } // ~Inst_VOP3__V_CMP_NLT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F32 class methods --- + + Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_TRU_F32 + + Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() + { + } // ~Inst_VOP3__V_CMP_TRU_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F32 class methods --- + + Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F32 + + Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() + { + } // ~Inst_VOP3__V_CMPX_F_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F32 class methods --- + + Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F32 + + Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() + { + } // ~Inst_VOP3__V_CMPX_LT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F32 class methods --- + + Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F32 + + Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() + { + } // ~Inst_VOP3__V_CMPX_EQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F32 class methods --- + + Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F32 + + Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() + { + } // ~Inst_VOP3__V_CMPX_LE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F32 class methods --- + + Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F32 + + Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() + { + } // ~Inst_VOP3__V_CMPX_GT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F32 class methods --- + + Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F32 + + Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() + { + } // ~Inst_VOP3__V_CMPX_LG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F32 class methods --- + + Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F32 + + Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() + { + } // ~Inst_VOP3__V_CMPX_GE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F32 class methods --- + + Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F32 + + Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() + { + } // ~Inst_VOP3__V_CMPX_O_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F32 class methods --- + + Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F32 + + Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() + { + } // ~Inst_VOP3__V_CMPX_U_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F32 class methods --- + + Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F32 + + Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() + { + } // ~Inst_VOP3__V_CMPX_NGE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F32 class methods --- + + Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F32 + + Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() + { + } // ~Inst_VOP3__V_CMPX_NLG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F32 class methods --- + + Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F32 + + Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() + { + } // ~Inst_VOP3__V_CMPX_NGT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F32 class methods --- + + Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F32 + + Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() + { + } // ~Inst_VOP3__V_CMPX_NLE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F32 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F32 + + Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F32 class methods --- + + Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F32 + + Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() + { + } // ~Inst_VOP3__V_CMPX_NLT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F32 class methods --- + + Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F32 + + Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() + { + } // ~Inst_VOP3__V_CMPX_TRU_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_F64 class methods --- + + Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_F_F64 + + Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() + { + } // ~Inst_VOP3__V_CMP_F_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F64 class methods --- + + Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LT_F64 + + Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() + { + } // ~Inst_VOP3__V_CMP_LT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F64 class methods --- + + Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_EQ_F64 + + Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() + { + } // ~Inst_VOP3__V_CMP_EQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F64 class methods --- + + Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LE_F64 + + Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() + { + } // ~Inst_VOP3__V_CMP_LE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F64 class methods --- + + Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_GT_F64 + + Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() + { + } // ~Inst_VOP3__V_CMP_GT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F64 class methods --- + + Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LG_F64 + + Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() + { + } // ~Inst_VOP3__V_CMP_LG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F64 class methods --- + + Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_GE_F64 + + Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() + { + } // ~Inst_VOP3__V_CMP_GE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_O_F64 class methods --- + + Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_O_F64 + + Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() + { + } // ~Inst_VOP3__V_CMP_O_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_U_F64 class methods --- + + Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_U_F64 + + Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() + { + } // ~Inst_VOP3__V_CMP_U_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F64 class methods --- + + Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NGE_F64 + + Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() + { + } // ~Inst_VOP3__V_CMP_NGE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F64 class methods --- + + Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLG_F64 + + Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() + { + } // ~Inst_VOP3__V_CMP_NLG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F64 class methods --- + + Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NGT_F64 + + Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() + { + } // ~Inst_VOP3__V_CMP_NGT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F64 class methods --- + + Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLE_F64 + + Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() + { + } // ~Inst_VOP3__V_CMP_NLE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F64 class methods --- + + Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NEQ_F64 + + Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() + { + } // ~Inst_VOP3__V_CMP_NEQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F64 class methods --- + + Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLT_F64 + + Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() + { + } // ~Inst_VOP3__V_CMP_NLT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F64 class methods --- + + Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_TRU_F64 + + Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() + { + } // ~Inst_VOP3__V_CMP_TRU_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F64 class methods --- + + Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F64 + + Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() + { + } // ~Inst_VOP3__V_CMPX_F_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F64 class methods --- + + Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F64 + + Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() + { + } // ~Inst_VOP3__V_CMPX_LT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F64 class methods --- + + Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F64 + + Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() + { + } // ~Inst_VOP3__V_CMPX_EQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F64 class methods --- + + Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F64 + + Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() + { + } // ~Inst_VOP3__V_CMPX_LE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F64 class methods --- + + Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F64 + + Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() + { + } // ~Inst_VOP3__V_CMPX_GT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F64 class methods --- + + Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F64 + + Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() + { + } // ~Inst_VOP3__V_CMPX_LG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F64 class methods --- + + Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F64 + + Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() + { + } // ~Inst_VOP3__V_CMPX_GE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F64 class methods --- + + Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F64 + + Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() + { + } // ~Inst_VOP3__V_CMPX_O_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F64 class methods --- + + Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F64 + + Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() + { + } // ~Inst_VOP3__V_CMPX_U_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F64 class methods --- + + Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F64 + + Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() + { + } // ~Inst_VOP3__V_CMPX_NGE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F64 class methods --- + + Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F64 + + Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() + { + } // ~Inst_VOP3__V_CMPX_NLG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F64 class methods --- + + Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F64 + + Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() + { + } // ~Inst_VOP3__V_CMPX_NGT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F64 class methods --- + + Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F64 + + Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() + { + } // ~Inst_VOP3__V_CMPX_NLE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F64 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F64 + + Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F64 class methods --- + + Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F64 + + Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() + { + } // ~Inst_VOP3__V_CMPX_NLT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F64 class methods --- + + Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F64 + + Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() + { + } // ~Inst_VOP3__V_CMPX_TRU_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I16 class methods --- + + Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I16 + + Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() + { + } // ~Inst_VOP3__V_CMP_F_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I16 class methods --- + + Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I16 + + Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() + { + } // ~Inst_VOP3__V_CMP_LT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I16 class methods --- + + Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I16 + + Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() + { + } // ~Inst_VOP3__V_CMP_EQ_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I16 class methods --- + + Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I16 + + Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() + { + } // ~Inst_VOP3__V_CMP_LE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I16 class methods --- + + Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I16 + + Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() + { + } // ~Inst_VOP3__V_CMP_GT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I16 class methods --- + + Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I16 + + Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() + { + } // ~Inst_VOP3__V_CMP_NE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I16 class methods --- + + Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I16 + + Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() + { + } // ~Inst_VOP3__V_CMP_GE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I16 class methods --- + + Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I16 + + Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() + { + } // ~Inst_VOP3__V_CMP_T_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U16 class methods --- + + Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U16 + + Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() + { + } // ~Inst_VOP3__V_CMP_F_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U16 class methods --- + + Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U16 + + Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() + { + } // ~Inst_VOP3__V_CMP_LT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U16 class methods --- + + Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U16 + + Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() + { + } // ~Inst_VOP3__V_CMP_EQ_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U16 class methods --- + + Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U16 + + Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() + { + } // ~Inst_VOP3__V_CMP_LE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U16 class methods --- + + Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U16 + + Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() + { + } // ~Inst_VOP3__V_CMP_GT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U16 class methods --- + + Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U16 + + Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() + { + } // ~Inst_VOP3__V_CMP_NE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U16 class methods --- + + Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U16 + + Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() + { + } // ~Inst_VOP3__V_CMP_GE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U16 class methods --- + + Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U16 + + Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() + { + } // ~Inst_VOP3__V_CMP_T_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I16 class methods --- + + Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I16 + + Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() + { + } // ~Inst_VOP3__V_CMPX_F_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I16 class methods --- + + Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I16 + + Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() + { + } // ~Inst_VOP3__V_CMPX_LT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I16 class methods --- + + Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I16 + + Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() + { + } // ~Inst_VOP3__V_CMPX_EQ_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I16 class methods --- + + Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I16 + + Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() + { + } // ~Inst_VOP3__V_CMPX_LE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I16 class methods --- + + Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I16 + + Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() + { + } // ~Inst_VOP3__V_CMPX_GT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I16 class methods --- + + Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I16 + + Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() + { + } // ~Inst_VOP3__V_CMPX_NE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I16 class methods --- + + Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I16 + + Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() + { + } // ~Inst_VOP3__V_CMPX_GE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I16 class methods --- + + Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I16 + + Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() + { + } // ~Inst_VOP3__V_CMPX_T_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U16 class methods --- + + Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U16 + + Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() + { + } // ~Inst_VOP3__V_CMPX_F_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U16 class methods --- + + Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U16 + + Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() + { + } // ~Inst_VOP3__V_CMPX_LT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U16 class methods --- + + Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U16 + + Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() + { + } // ~Inst_VOP3__V_CMPX_EQ_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U16 class methods --- + + Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U16 + + Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() + { + } // ~Inst_VOP3__V_CMPX_LE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U16 class methods --- + + Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U16 + + Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() + { + } // ~Inst_VOP3__V_CMPX_GT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U16 class methods --- + + Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U16 + + Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() + { + } // ~Inst_VOP3__V_CMPX_NE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U16 class methods --- + + Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U16 + + Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() + { + } // ~Inst_VOP3__V_CMPX_GE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U16 class methods --- + + Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U16 + + Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() + { + } // ~Inst_VOP3__V_CMPX_T_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I32 class methods --- + + Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I32 + + Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() + { + } // ~Inst_VOP3__V_CMP_F_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I32 class methods --- + + Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I32 + + Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() + { + } // ~Inst_VOP3__V_CMP_LT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I32 class methods --- + + Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I32 + + Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() + { + } // ~Inst_VOP3__V_CMP_EQ_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I32 class methods --- + + Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I32 + + Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() + { + } // ~Inst_VOP3__V_CMP_LE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I32 class methods --- + + Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I32 + + Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() + { + } // ~Inst_VOP3__V_CMP_GT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I32 class methods --- + + Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I32 + + Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() + { + } // ~Inst_VOP3__V_CMP_NE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I32 class methods --- + + Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I32 + + Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() + { + } // ~Inst_VOP3__V_CMP_GE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I32 class methods --- + + Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I32 + + Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() + { + } // ~Inst_VOP3__V_CMP_T_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U32 class methods --- + + Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U32 + + Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() + { + } // ~Inst_VOP3__V_CMP_F_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U32 class methods --- + + Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U32 + + Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() + { + } // ~Inst_VOP3__V_CMP_LT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U32 class methods --- + + Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U32 + + Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() + { + } // ~Inst_VOP3__V_CMP_EQ_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U32 class methods --- + + Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U32 + + Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() + { + } // ~Inst_VOP3__V_CMP_LE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U32 class methods --- + + Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U32 + + Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() + { + } // ~Inst_VOP3__V_CMP_GT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U32 class methods --- + + Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U32 + + Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() + { + } // ~Inst_VOP3__V_CMP_NE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U32 class methods --- + + Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U32 + + Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() + { + } // ~Inst_VOP3__V_CMP_GE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U32 class methods --- + + Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U32 + + Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() + { + } // ~Inst_VOP3__V_CMP_T_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I32 class methods --- + + Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I32 + + Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() + { + } // ~Inst_VOP3__V_CMPX_F_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I32 class methods --- + + Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I32 + + Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() + { + } // ~Inst_VOP3__V_CMPX_LT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I32 class methods --- + + Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I32 + + Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() + { + } // ~Inst_VOP3__V_CMPX_EQ_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I32 class methods --- + + Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I32 + + Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() + { + } // ~Inst_VOP3__V_CMPX_LE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I32 class methods --- + + Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I32 + + Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() + { + } // ~Inst_VOP3__V_CMPX_GT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I32 class methods --- + + Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I32 + + Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() + { + } // ~Inst_VOP3__V_CMPX_NE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I32 class methods --- + + Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I32 + + Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() + { + } // ~Inst_VOP3__V_CMPX_GE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I32 class methods --- + + Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I32 + + Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() + { + } // ~Inst_VOP3__V_CMPX_T_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U32 class methods --- + + Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U32 + + Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() + { + } // ~Inst_VOP3__V_CMPX_F_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U32 class methods --- + + Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U32 + + Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() + { + } // ~Inst_VOP3__V_CMPX_LT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U32 class methods --- + + Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U32 + + Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() + { + } // ~Inst_VOP3__V_CMPX_EQ_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U32 class methods --- + + Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U32 + + Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() + { + } // ~Inst_VOP3__V_CMPX_LE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U32 class methods --- + + Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U32 + + Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() + { + } // ~Inst_VOP3__V_CMPX_GT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U32 class methods --- + + Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U32 + + Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() + { + } // ~Inst_VOP3__V_CMPX_NE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U32 class methods --- + + Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U32 + + Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() + { + } // ~Inst_VOP3__V_CMPX_GE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U32 class methods --- + + Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U32 + + Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() + { + } // ~Inst_VOP3__V_CMPX_T_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I64 class methods --- + + Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I64 + + Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() + { + } // ~Inst_VOP3__V_CMP_F_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I64 class methods --- + + Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I64 + + Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() + { + } // ~Inst_VOP3__V_CMP_LT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I64 class methods --- + + Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I64 + + Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() + { + } // ~Inst_VOP3__V_CMP_EQ_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I64 class methods --- + + Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I64 + + Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() + { + } // ~Inst_VOP3__V_CMP_LE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I64 class methods --- + + Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I64 + + Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() + { + } // ~Inst_VOP3__V_CMP_GT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I64 class methods --- + + Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I64 + + Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() + { + } // ~Inst_VOP3__V_CMP_NE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I64 class methods --- + + Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I64 + + Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() + { + } // ~Inst_VOP3__V_CMP_GE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I64 class methods --- + + Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I64 + + Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() + { + } // ~Inst_VOP3__V_CMP_T_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U64 class methods --- + + Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U64 + + Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() + { + } // ~Inst_VOP3__V_CMP_F_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U64 class methods --- + + Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U64 + + Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() + { + } // ~Inst_VOP3__V_CMP_LT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U64 class methods --- + + Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U64 + + Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() + { + } // ~Inst_VOP3__V_CMP_EQ_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U64 class methods --- + + Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U64 + + Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() + { + } // ~Inst_VOP3__V_CMP_LE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U64 class methods --- + + Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U64 + + Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() + { + } // ~Inst_VOP3__V_CMP_GT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U64 class methods --- + + Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U64 + + Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() + { + } // ~Inst_VOP3__V_CMP_NE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U64 class methods --- + + Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U64 + + Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() + { + } // ~Inst_VOP3__V_CMP_GE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U64 class methods --- + + Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U64 + + Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() + { + } // ~Inst_VOP3__V_CMP_T_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I64 class methods --- + + Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I64 + + Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() + { + } // ~Inst_VOP3__V_CMPX_F_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I64 class methods --- + + Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I64 + + Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() + { + } // ~Inst_VOP3__V_CMPX_LT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I64 class methods --- + + Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I64 + + Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() + { + } // ~Inst_VOP3__V_CMPX_EQ_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I64 class methods --- + + Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I64 + + Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() + { + } // ~Inst_VOP3__V_CMPX_LE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I64 class methods --- + + Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I64 + + Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() + { + } // ~Inst_VOP3__V_CMPX_GT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I64 class methods --- + + Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I64 + + Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() + { + } // ~Inst_VOP3__V_CMPX_NE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I64 class methods --- + + Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I64 + + Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() + { + } // ~Inst_VOP3__V_CMPX_GE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I64 class methods --- + + Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I64 + + Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() + { + } // ~Inst_VOP3__V_CMPX_T_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U64 class methods --- + + Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U64 + + Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() + { + } // ~Inst_VOP3__V_CMPX_F_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U64 class methods --- + + Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U64 + + Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() + { + } // ~Inst_VOP3__V_CMPX_LT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U64 class methods --- + + Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U64 + + Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() + { + } // ~Inst_VOP3__V_CMPX_EQ_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U64 class methods --- + + Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U64 + + Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() + { + } // ~Inst_VOP3__V_CMPX_LE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U64 class methods --- + + Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U64 + + Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() + { + } // ~Inst_VOP3__V_CMPX_GT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U64 class methods --- + + Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U64 + + Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() + { + } // ~Inst_VOP3__V_CMPX_NE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U64 class methods --- + + Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U64 + + Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() + { + } // ~Inst_VOP3__V_CMPX_GE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U64 class methods --- + + Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U64 + + Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() + { + } // ~Inst_VOP3__V_CMPX_T_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3p.cc b/src/arch/amdgpu/vega/insts/vop3p.cc index eddb1e7ad5..85f0af2a51 100644 --- a/src/arch/amdgpu/vega/insts/vop3p.cc +++ b/src/arch/amdgpu/vega/insts/vop3p.cc @@ -31,6 +31,7 @@ #include "arch/amdgpu/vega/insts/vop3p.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" #include "arch/arm/insts/fplib.hh" namespace gem5 @@ -631,5 +632,236 @@ void Inst_VOP3P__V_ACCVGPR_WRITE::execute(GPUDynInstPtr gpuDynInst) vdst.write(); } +// --- Inst_VOP3P__V_PK_FMA_F32 class methods --- + +Inst_VOP3P__V_PK_FMA_F32::Inst_VOP3P__V_PK_FMA_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_fma_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_FMA_F32 + +Inst_VOP3P__V_PK_FMA_F32::~Inst_VOP3P__V_PK_FMA_F32() +{ +} // ~Inst_VOP3P__V_PK_FMA_F32 + +// D.f[63:32] = S0.f[63:32] * S1.f[63:32] + S2.f[63:32] . D.f[31:0] = +// S0.f[31:0] * S1.f[31:0] + S2.f[31:0] . +void +Inst_VOP3P__V_PK_FMA_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI | (instData.OPSEL_HI2 << 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t s0l = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t s1l = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + uint32_t s2l = (opsel & 4) ? bits(src2[lane], 63, 32) + : bits(src2[lane], 31, 0); + + float dword1 = std::fma(*reinterpret_cast(&s0l), + *reinterpret_cast(&s1l), + *reinterpret_cast(&s2l)); + + uint32_t s0h = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t s1h = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + uint32_t s2h = (opsel_hi & 4) ? bits(src2[lane], 63, 32) + : bits(src2[lane], 31, 0); + + float dword2 = std::fma(*reinterpret_cast(&s0h), + *reinterpret_cast(&s1h), + *reinterpret_cast(&s2h)); + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_MUL_F32 class methods --- + +Inst_VOP3P__V_PK_MUL_F32::Inst_VOP3P__V_PK_MUL_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_mul_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_MUL_F32 + +Inst_VOP3P__V_PK_MUL_F32::~Inst_VOP3P__V_PK_MUL_F32() +{ +} // ~Inst_VOP3P__V_PK_MUL_F32 + +// D.f[63:32] = S0.f[63:32] * S1.f[63:32] . D.f[31:0] = S0.f[31:0] * +// S1.f[31:0] +void +Inst_VOP3P__V_PK_MUL_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float dword1 = *reinterpret_cast(&lower_dword) + * *reinterpret_cast(&upper_dword); + + lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float dword2 = *reinterpret_cast(&lower_dword) + * *reinterpret_cast(&upper_dword); + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_ADD_F32 class methods --- + +Inst_VOP3P__V_PK_ADD_F32::Inst_VOP3P__V_PK_ADD_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_add_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_ADD_F32 + +Inst_VOP3P__V_PK_ADD_F32::~Inst_VOP3P__V_PK_ADD_F32() +{ +} // ~Inst_VOP3P__V_PK_ADD_F32 + +// D.f[63:32] = S0.f[63:32] + S1.f[63:32] . D.f[31:0] = S0.f[31:0] + +// S1.f[31:0] +void +Inst_VOP3P__V_PK_ADD_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float dword1 = *reinterpret_cast(&lower_dword) + + *reinterpret_cast(&upper_dword); + + lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float dword2 = *reinterpret_cast(&lower_dword) + + *reinterpret_cast(&upper_dword); + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_MOV_B32 class methods --- + +Inst_VOP3P__V_PK_MOV_B32::Inst_VOP3P__V_PK_MOV_B32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_mov_b32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_MOV_B32 + +Inst_VOP3P__V_PK_MOV_B32::~Inst_VOP3P__V_PK_MOV_B32() +{ +} // ~Inst_VOP3P__V_PK_MOV_B32 + +// D.u[63:32] = S1.u[31:0]; D.u[31:0] = S0.u[31:0]. +void +Inst_VOP3P__V_PK_MOV_B32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + // Only OPSEL[1:0] are used + // OPSEL[0] 0/1: Lower dest dword = lower/upper dword of src0 + + int opsel = instData.OPSEL; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + // OPSEL[1] 0/1: Lower dest dword = lower/upper dword of src1 + uint64_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint64_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + vdst[lane] = upper_dword << 32 | lower_dword; + } + } + + vdst.write(); +} // execute + } // namespace VegaISA } // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3p_mai.cc b/src/arch/amdgpu/vega/insts/vop3p_mai.cc new file mode 100644 index 0000000000..943aa72cfd --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3p_mai.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "arch/amdgpu/vega/insts/vop3p.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 class methods --- + + Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8:: + Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8(InFmt_VOP3P_MAI *iFmt) + : Inst_VOP3P_MAI(iFmt, "v_mfma_i32_16x16x16i8") + { + setFlag(ALU); + } // Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 + + Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8:: + ~Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8() + { + } // ~Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8 + + // D(16x16I32) = A(16x16I8) x B(16x16I8) + C(16x16I32), 1 Blocks, 8 + // pass, srcA/srcB 1 archVgpr, srcC/D 4 accVGPR + void + Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8::execute(GPUDynInstPtr gpuDynInst) + { + int acc_offset = 0; + if (instData.ACC_CD) { + warn("ACC_CD not yet implemented\n"); + } + + // int8 size allows for 4 elements per lane. At 16x16 this means 4 + // lanes per column (A matrix) / (B matrix). This whole matrix fits + // in one VGPR. The C matrix with size int32 requires 4 VGPRs. + // Handle the C matrix by using a delta. This is set to 1 normally to + // move to the next VGPR (1 dword away) and 0 if the input is a scalar + // reg (e.g., a constant). + int delta = isVectorReg(extData.SRC2) ? 1 : 0; + + // VecOperandI8 will read 8 bits and sign extend, so used U32 to read + // as "untyped" 32-bit values. + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2a(gpuDynInst, extData.SRC2+acc_offset); + ConstVecOperandI32 src2b(gpuDynInst, extData.SRC2+acc_offset+1*delta); + ConstVecOperandI32 src2c(gpuDynInst, extData.SRC2+acc_offset+2*delta); + ConstVecOperandI32 src2d(gpuDynInst, extData.SRC2+acc_offset+3*delta); + + VecOperandI32 vdsta(gpuDynInst, instData.VDST+acc_offset); + VecOperandI32 vdstb(gpuDynInst, instData.VDST+acc_offset+1); + VecOperandI32 vdstc(gpuDynInst, instData.VDST+acc_offset+2); + VecOperandI32 vdstd(gpuDynInst, instData.VDST+acc_offset+3); + + src0.readSrc(); + src1.readSrc(); + src2a.readSrc(); + src2b.readSrc(); + src2c.readSrc(); + src2d.readSrc(); + + int32_t A[16][16]; + for (int i = 0; i < 64; ++i) { + // src0[0:15] contains columns 1 - 4 packed for rows 0 - 15, + // src0[16:31] contains columns 5 - 8 packed for rows 0 - 15, + // src0[32:47] contains columns 9 - 12 packed for rows 0 - 15, + // src0[48:63] contains columns 13 - 16 packed for rows 0 - 15, + int row = i % 16; + int start_col = (i / 16) * 4; + + A[row][start_col+0] = sext<8>(bits(src0[i], 7, 0)); + A[row][start_col+1] = sext<8>(bits(src0[i], 15, 8)); + A[row][start_col+2] = sext<8>(bits(src0[i], 23, 16)); + A[row][start_col+3] = sext<8>(bits(src0[i], 31, 24)); + } + + int32_t B[16][16]; + for (int i = 0; i < 64; ++i) { + // src1[0:15] contains rows 1 - 4 packed for columns 0 - 15 + // src1[16:31] contains rows 5 - 8 packed for columns 0 - 15 + // src1[32:47] contains rows 9 - 12 packed for columns 0 - 15 + // src1[48:63] contains rows 13 - 16 packed for columns 0 - 15 + int start_row = (i / 16) * 4; + int col = i % 16; + + B[start_row+0][col] = sext<8>(bits(src1[i], 7, 0)); + B[start_row+1][col] = sext<8>(bits(src1[i], 15, 8)); + B[start_row+2][col] = sext<8>(bits(src1[i], 23, 16)); + B[start_row+3][col] = sext<8>(bits(src1[i], 31, 24)); + } + + int32_t result[16][16]; + + // Load accumulation matrix C into result + for (int i = 0; i < 64; ++i) { + // src2a contains rows 0, 4, 8, 12 + result[(i/16)*4][(i%16)] = src2a[i]; + // src2b contains rows 1, 5, 9, 13 + result[(i/16)*4+1][(i%16)] = src2b[i]; + // src2c contains rows 2, 6, 10, 14 + result[(i/16)*4+2][(i%16)] = src2c[i]; + // src2d contains rows 3, 7, 11, 15 + result[(i/16)*4+3][(i%16)] = src2d[i]; + } + + // Compute new result - This is (obviously) not optimized + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 16; ++j) { + for (int k = 0; k < 16; ++k) { + result[i][j] += A[i][k] * B[k][j]; + } + } + } + + // Put result in dest VGPRs + for (int i = 0; i < 64; ++i) { + // vdsta contains rows 0, 4, 8, 12 + vdsta[i] = result[(i/16)*4][(i%16)]; + // vdstb contains rows 1, 5, 9, 13 + vdstb[i] = result[(i/16)*4+1][(i%16)]; + // vdstc contains rows 2, 6, 10, 14 + vdstc[i] = result[(i/16)*4+2][(i%16)]; + // vdstd contains rows 3, 7, 11, 15 + vdstd[i] = result[(i/16)*4+3][(i%16)]; + } + + vdsta.write(); + vdstb.write(); + vdstc.write(); + vdstd.write(); + } // execute + // --- Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 class methods --- + + Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64:: + Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64(InFmt_VOP3P_MAI *iFmt) + : Inst_VOP3P_MAI(iFmt, "v_mfma_f64_16x16x4f64") + { + setFlag(ALU); + } // Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 + + Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64:: + ~Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64() + { + } // ~Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64 + + // D(16x16F64) = A(16x4F64) x B(4x16F64) + C(16x16F64), 1 Blocks, 8 + // pass, srcA/srcB 2 VGPR, srcC/D 8 VGPR + void + Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64::execute(GPUDynInstPtr gpuDynInst) + { + int acc_offset = 0; + if (instData.ACC_CD) { + warn("ACC_CD not yet implemented\n"); + } + + // Handling of src2 is a bit tricky. The operator[] overload cannot + // be used for dword count > 2, and the dword count here is 8. Usually + // src2 is a VGPR/AccGPR, but it might also be constant. In order to + // use operator[] and handle constants, check for VGPR here and set + // a delta for each of the pairs of src2 GPRs. + int delta = isVectorReg(extData.SRC2) ? 2 : 0; + + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2a(gpuDynInst, extData.SRC2+acc_offset); + ConstVecOperandF64 src2b(gpuDynInst, extData.SRC2+acc_offset+1*delta); + ConstVecOperandF64 src2c(gpuDynInst, extData.SRC2+acc_offset+2*delta); + ConstVecOperandF64 src2d(gpuDynInst, extData.SRC2+acc_offset+3*delta); + + VecOperandF64 vdsta(gpuDynInst, instData.VDST+acc_offset); + VecOperandF64 vdstb(gpuDynInst, instData.VDST+acc_offset+2); + VecOperandF64 vdstc(gpuDynInst, instData.VDST+acc_offset+4); + VecOperandF64 vdstd(gpuDynInst, instData.VDST+acc_offset+6); + + src0.readSrc(); + src1.readSrc(); + src2a.readSrc(); + src2b.readSrc(); + src2c.readSrc(); + src2d.readSrc(); + + double result[16][16]; + + // Load src2 into result. src2 is row major + for (int i = 0; i < 64; ++i) { + // src2a contains rows 0 - 3 + result[(i/16)][(i%16)] = src2a[i]; + // src2b contains rows 4 - 7 + result[(i/16)+4][(i%16)] = src2b[i]; + // src2c contains rows 8 - 11 + result[(i/16)+8][(i%16)] = src2c[i]; + // src2d contains rows 12 - 15 + result[(i/16)+12][(i%16)] = src2d[i]; + } + + // Compute new result + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 16; ++j) { + for (int k = 0; k < 4; ++k) { + // src0 is column major, src1 is row major + int lane_A = 16*k + i; + int lane_B = 16*k + j; + result[i][j] += src0[lane_A] * src1[lane_B]; + } + } + } + + // Put result in dest VGPRs + for (int i = 0; i < 64; ++i) { + // vdsta contains rows 0 - 3 + vdsta[i] = result[(i/16)][(i%16)]; + // src2b contains rows 4 - 7 + vdstb[i] = result[(i/16)+4][(i%16)]; + // src2c contains rows 8 - 11 + vdstc[i] = result[(i/16)+8][(i%16)]; + // src2d contains rows 12 - 15 + vdstd[i] = result[(i/16)+12][(i%16)]; + } + + vdsta.write(); + vdstb.write(); + vdstc.write(); + vdstd.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc new file mode 100644 index 0000000000..2c386fec74 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -0,0 +1,6590 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOPC__V_CMP_CLASS_F32 class methods --- + + Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_CLASS_F32 + + Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() + { + } // ~Inst_VOPC__V_CMP_CLASS_F32 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F32 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F32 + + Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F32 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.f The function reports true if the floating point value is *any* of + // the numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_CLASS_F64 class methods --- + + Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_CLASS_F64 + + Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() + { + } // ~Inst_VOPC__V_CMP_CLASS_F64 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.d + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F64 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F64 + + Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F64 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.d The function reports true if the floating point value is *any* of + // the numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_CLASS_F16 class methods --- + + Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_CLASS_F16 + + Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() + { + } // ~Inst_VOPC__V_CMP_CLASS_F16 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F16 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F16 + + Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F16 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // --- S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_F_F16 class methods --- + + Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_F_F16 + + Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() + { + } // ~Inst_VOPC__V_CMP_F_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F16 class methods --- + + Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LT_F16 + + Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() + { + } // ~Inst_VOPC__V_CMP_LT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F16 class methods --- + + Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_EQ_F16 + + Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() + { + } // ~Inst_VOPC__V_CMP_EQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F16 class methods --- + + Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LE_F16 + + Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() + { + } // ~Inst_VOPC__V_CMP_LE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F16 class methods --- + + Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_GT_F16 + + Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() + { + } // ~Inst_VOPC__V_CMP_GT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F16 class methods --- + + Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LG_F16 + + Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() + { + } // ~Inst_VOPC__V_CMP_LG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F16 class methods --- + + Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_GE_F16 + + Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() + { + } // ~Inst_VOPC__V_CMP_GE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_O_F16 class methods --- + + Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_O_F16 + + Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() + { + } // ~Inst_VOPC__V_CMP_O_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_U_F16 class methods --- + + Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_U_F16 + + Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() + { + } // ~Inst_VOPC__V_CMP_U_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F16 class methods --- + + Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NGE_F16 + + Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() + { + } // ~Inst_VOPC__V_CMP_NGE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F16 class methods --- + + Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLG_F16 + + Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() + { + } // ~Inst_VOPC__V_CMP_NLG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F16 class methods --- + + Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NGT_F16 + + Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() + { + } // ~Inst_VOPC__V_CMP_NGT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F16 class methods --- + + Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLE_F16 + + Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() + { + } // ~Inst_VOPC__V_CMP_NLE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F16 class methods --- + + Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NEQ_F16 + + Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() + { + } // ~Inst_VOPC__V_CMP_NEQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F16 class methods --- + + Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLT_F16 + + Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() + { + } // ~Inst_VOPC__V_CMP_NLT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F16 class methods --- + + Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_TRU_F16 + + Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() + { + } // ~Inst_VOPC__V_CMP_TRU_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F16 class methods --- + + Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F16 + + Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() + { + } // ~Inst_VOPC__V_CMPX_F_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F16 class methods --- + + Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F16 + + Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() + { + } // ~Inst_VOPC__V_CMPX_LT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F16 class methods --- + + Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F16 + + Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() + { + } // ~Inst_VOPC__V_CMPX_EQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F16 class methods --- + + Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F16 + + Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() + { + } // ~Inst_VOPC__V_CMPX_LE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F16 class methods --- + + Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F16 + + Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() + { + } // ~Inst_VOPC__V_CMPX_GT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F16 class methods --- + + Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F16 + + Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() + { + } // ~Inst_VOPC__V_CMPX_LG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F16 class methods --- + + Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F16 + + Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() + { + } // ~Inst_VOPC__V_CMPX_GE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F16 class methods --- + + Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F16 + + Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() + { + } // ~Inst_VOPC__V_CMPX_O_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F16 class methods --- + + Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F16 + + Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() + { + } // ~Inst_VOPC__V_CMPX_U_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F16 class methods --- + + Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F16 + + Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() + { + } // ~Inst_VOPC__V_CMPX_NGE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F16 class methods --- + + Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F16 + + Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() + { + } // ~Inst_VOPC__V_CMPX_NLG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F16 class methods --- + + Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F16 + + Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() + { + } // ~Inst_VOPC__V_CMPX_NGT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F16 class methods --- + + Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F16 + + Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() + { + } // ~Inst_VOPC__V_CMPX_NLE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F16 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F16 + + Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F16 class methods --- + + Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F16 + + Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() + { + } // ~Inst_VOPC__V_CMPX_NLT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F16 class methods --- + + Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F16 + + Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() + { + } // ~Inst_VOPC__V_CMPX_TRU_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_F_F32 class methods --- + + Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_F_F32 + + Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() + { + } // ~Inst_VOPC__V_CMP_F_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F32 class methods --- + + Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LT_F32 + + Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() + { + } // ~Inst_VOPC__V_CMP_LT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F32 class methods --- + + Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_EQ_F32 + + Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() + { + } // ~Inst_VOPC__V_CMP_EQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F32 class methods --- + + Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LE_F32 + + Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() + { + } // ~Inst_VOPC__V_CMP_LE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F32 class methods --- + + Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_GT_F32 + + Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() + { + } // ~Inst_VOPC__V_CMP_GT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F32 class methods --- + + Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LG_F32 + + Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() + { + } // ~Inst_VOPC__V_CMP_LG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F32 class methods --- + + Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_GE_F32 + + Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() + { + } // ~Inst_VOPC__V_CMP_GE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_O_F32 class methods --- + + Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_O_F32 + + Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() + { + } // ~Inst_VOPC__V_CMP_O_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_U_F32 class methods --- + + Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_U_F32 + + Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() + { + } // ~Inst_VOPC__V_CMP_U_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F32 class methods --- + + Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NGE_F32 + + Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() + { + } // ~Inst_VOPC__V_CMP_NGE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F32 class methods --- + + Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLG_F32 + + Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() + { + } // ~Inst_VOPC__V_CMP_NLG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F32 class methods --- + + Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NGT_F32 + + Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() + { + } // ~Inst_VOPC__V_CMP_NGT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F32 class methods --- + + Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLE_F32 + + Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() + { + } // ~Inst_VOPC__V_CMP_NLE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F32 class methods --- + + Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NEQ_F32 + + Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() + { + } // ~Inst_VOPC__V_CMP_NEQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F32 class methods --- + + Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLT_F32 + + Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() + { + } // ~Inst_VOPC__V_CMP_NLT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F32 class methods --- + + Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_TRU_F32 + + Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() + { + } // ~Inst_VOPC__V_CMP_TRU_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F32 class methods --- + + Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F32 + + Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() + { + } // ~Inst_VOPC__V_CMPX_F_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F32 class methods --- + + Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F32 + + Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() + { + } // ~Inst_VOPC__V_CMPX_LT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F32 class methods --- + + Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F32 + + Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() + { + } // ~Inst_VOPC__V_CMPX_EQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F32 class methods --- + + Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F32 + + Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() + { + } // ~Inst_VOPC__V_CMPX_LE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F32 class methods --- + + Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F32 + + Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() + { + } // ~Inst_VOPC__V_CMPX_GT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F32 class methods --- + + Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F32 + + Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() + { + } // ~Inst_VOPC__V_CMPX_LG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F32 class methods --- + + Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F32 + + Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() + { + } // ~Inst_VOPC__V_CMPX_GE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F32 class methods --- + + Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F32 + + Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() + { + } // ~Inst_VOPC__V_CMPX_O_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F32 class methods --- + + Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F32 + + Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() + { + } // ~Inst_VOPC__V_CMPX_U_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F32 class methods --- + + Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F32 + + Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() + { + } // ~Inst_VOPC__V_CMPX_NGE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F32 class methods --- + + Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F32 + + Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() + { + } // ~Inst_VOPC__V_CMPX_NLG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F32 class methods --- + + Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F32 + + Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() + { + } // ~Inst_VOPC__V_CMPX_NGT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F32 class methods --- + + Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F32 + + Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() + { + } // ~Inst_VOPC__V_CMPX_NLE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F32 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F32 + + Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F32 class methods --- + + Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F32 + + Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() + { + } // ~Inst_VOPC__V_CMPX_NLT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F32 class methods --- + + Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F32 + + Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() + { + } // ~Inst_VOPC__V_CMPX_TRU_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_F_F64 class methods --- + + Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_F_F64 + + Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() + { + } // ~Inst_VOPC__V_CMP_F_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F64 class methods --- + + Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LT_F64 + + Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() + { + } // ~Inst_VOPC__V_CMP_LT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F64 class methods --- + + Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_EQ_F64 + + Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() + { + } // ~Inst_VOPC__V_CMP_EQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F64 class methods --- + + Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LE_F64 + + Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() + { + } // ~Inst_VOPC__V_CMP_LE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F64 class methods --- + + Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_GT_F64 + + Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() + { + } // ~Inst_VOPC__V_CMP_GT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F64 class methods --- + + Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LG_F64 + + Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() + { + } // ~Inst_VOPC__V_CMP_LG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F64 class methods --- + + Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_GE_F64 + + Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() + { + } // ~Inst_VOPC__V_CMP_GE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_O_F64 class methods --- + + Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_O_F64 + + Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() + { + } // ~Inst_VOPC__V_CMP_O_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_U_F64 class methods --- + + Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_U_F64 + + Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() + { + } // ~Inst_VOPC__V_CMP_U_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F64 class methods --- + + Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NGE_F64 + + Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() + { + } // ~Inst_VOPC__V_CMP_NGE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F64 class methods --- + + Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLG_F64 + + Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() + { + } // ~Inst_VOPC__V_CMP_NLG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F64 class methods --- + + Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NGT_F64 + + Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() + { + } // ~Inst_VOPC__V_CMP_NGT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F64 class methods --- + + Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLE_F64 + + Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() + { + } // ~Inst_VOPC__V_CMP_NLE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F64 class methods --- + + Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NEQ_F64 + + Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() + { + } // ~Inst_VOPC__V_CMP_NEQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F64 class methods --- + + Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLT_F64 + + Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() + { + } // ~Inst_VOPC__V_CMP_NLT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F64 class methods --- + + Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_TRU_F64 + + Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() + { + } // ~Inst_VOPC__V_CMP_TRU_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F64 class methods --- + + Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F64 + + Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() + { + } // ~Inst_VOPC__V_CMPX_F_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F64 class methods --- + + Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F64 + + Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() + { + } // ~Inst_VOPC__V_CMPX_LT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F64 class methods --- + + Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F64 + + Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() + { + } // ~Inst_VOPC__V_CMPX_EQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F64 class methods --- + + Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F64 + + Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() + { + } // ~Inst_VOPC__V_CMPX_LE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F64 class methods --- + + Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F64 + + Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() + { + } // ~Inst_VOPC__V_CMPX_GT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F64 class methods --- + + Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F64 + + Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() + { + } // ~Inst_VOPC__V_CMPX_LG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F64 class methods --- + + Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F64 + + Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() + { + } // ~Inst_VOPC__V_CMPX_GE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F64 class methods --- + + Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F64 + + Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() + { + } // ~Inst_VOPC__V_CMPX_O_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F64 class methods --- + + Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F64 + + Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() + { + } // ~Inst_VOPC__V_CMPX_U_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F64 class methods --- + + Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F64 + + Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() + { + } // ~Inst_VOPC__V_CMPX_NGE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F64 class methods --- + + Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F64 + + Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() + { + } // ~Inst_VOPC__V_CMPX_NLG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F64 class methods --- + + Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F64 + + Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() + { + } // ~Inst_VOPC__V_CMPX_NGT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F64 class methods --- + + Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F64 + + Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() + { + } // ~Inst_VOPC__V_CMPX_NLE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F64 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F64 + + Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F64 class methods --- + + Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F64 + + Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() + { + } // ~Inst_VOPC__V_CMPX_NLT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F64 class methods --- + + Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F64 + + Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() + { + } // ~Inst_VOPC__V_CMPX_TRU_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I16 class methods --- + + Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I16 + + Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() + { + } // ~Inst_VOPC__V_CMP_F_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I16 class methods --- + + Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I16 + + Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() + { + } // ~Inst_VOPC__V_CMP_LT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I16 class methods --- + + Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I16 + + Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() + { + } // ~Inst_VOPC__V_CMP_EQ_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I16 class methods --- + + Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I16 + + Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() + { + } // ~Inst_VOPC__V_CMP_LE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I16 class methods --- + + Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I16 + + Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() + { + } // ~Inst_VOPC__V_CMP_GT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I16 class methods --- + + Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I16 + + Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() + { + } // ~Inst_VOPC__V_CMP_NE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I16 class methods --- + + Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I16 + + Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() + { + } // ~Inst_VOPC__V_CMP_GE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I16 class methods --- + + Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I16 + + Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() + { + } // ~Inst_VOPC__V_CMP_T_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U16 class methods --- + + Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U16 + + Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() + { + } // ~Inst_VOPC__V_CMP_F_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U16 class methods --- + + Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U16 + + Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() + { + } // ~Inst_VOPC__V_CMP_LT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U16 class methods --- + + Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U16 + + Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() + { + } // ~Inst_VOPC__V_CMP_EQ_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U16 class methods --- + + Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U16 + + Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() + { + } // ~Inst_VOPC__V_CMP_LE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U16 class methods --- + + Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U16 + + Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() + { + } // ~Inst_VOPC__V_CMP_GT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U16 class methods --- + + Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U16 + + Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() + { + } // ~Inst_VOPC__V_CMP_NE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U16 class methods --- + + Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U16 + + Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() + { + } // ~Inst_VOPC__V_CMP_GE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U16 class methods --- + + Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U16 + + Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() + { + } // ~Inst_VOPC__V_CMP_T_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I16 class methods --- + + Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I16 + + Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() + { + } // ~Inst_VOPC__V_CMPX_F_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I16 class methods --- + + Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I16 + + Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() + { + } // ~Inst_VOPC__V_CMPX_LT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I16 class methods --- + + Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I16 + + Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() + { + } // ~Inst_VOPC__V_CMPX_EQ_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I16 class methods --- + + Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I16 + + Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() + { + } // ~Inst_VOPC__V_CMPX_LE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I16 class methods --- + + Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I16 + + Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() + { + } // ~Inst_VOPC__V_CMPX_GT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I16 class methods --- + + Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I16 + + Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() + { + } // ~Inst_VOPC__V_CMPX_NE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I16 class methods --- + + Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I16 + + Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() + { + } // ~Inst_VOPC__V_CMPX_GE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I16 class methods --- + + Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I16 + + Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() + { + } // ~Inst_VOPC__V_CMPX_T_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U16 class methods --- + + Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U16 + + Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() + { + } // ~Inst_VOPC__V_CMPX_F_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U16 class methods --- + + Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U16 + + Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() + { + } // ~Inst_VOPC__V_CMPX_LT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U16 class methods --- + + Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U16 + + Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() + { + } // ~Inst_VOPC__V_CMPX_EQ_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U16 class methods --- + + Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U16 + + Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() + { + } // ~Inst_VOPC__V_CMPX_LE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U16 class methods --- + + Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U16 + + Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() + { + } // ~Inst_VOPC__V_CMPX_GT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U16 class methods --- + + Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U16 + + Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() + { + } // ~Inst_VOPC__V_CMPX_NE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U16 class methods --- + + Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U16 + + Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() + { + } // ~Inst_VOPC__V_CMPX_GE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U16 class methods --- + + Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U16 + + Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() + { + } // ~Inst_VOPC__V_CMPX_T_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I32 class methods --- + + Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I32 + + Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() + { + } // ~Inst_VOPC__V_CMP_F_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I32 class methods --- + + Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I32 + + Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() + { + } // ~Inst_VOPC__V_CMP_LT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I32 class methods --- + + Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I32 + + Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() + { + } // ~Inst_VOPC__V_CMP_EQ_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I32 class methods --- + + Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I32 + + Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() + { + } // ~Inst_VOPC__V_CMP_LE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I32 class methods --- + + Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I32 + + Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() + { + } // ~Inst_VOPC__V_CMP_GT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I32 class methods --- + + Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I32 + + Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() + { + } // ~Inst_VOPC__V_CMP_NE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I32 class methods --- + + Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I32 + + Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() + { + } // ~Inst_VOPC__V_CMP_GE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I32 class methods --- + + Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I32 + + Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() + { + } // ~Inst_VOPC__V_CMP_T_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U32 class methods --- + + Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U32 + + Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() + { + } // ~Inst_VOPC__V_CMP_F_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U32 class methods --- + + Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U32 + + Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() + { + } // ~Inst_VOPC__V_CMP_LT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U32 class methods --- + + Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U32 + + Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() + { + } // ~Inst_VOPC__V_CMP_EQ_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U32 class methods --- + + Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U32 + + Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() + { + } // ~Inst_VOPC__V_CMP_LE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U32 class methods --- + + Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U32 + + Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() + { + } // ~Inst_VOPC__V_CMP_GT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U32 class methods --- + + Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U32 + + Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() + { + } // ~Inst_VOPC__V_CMP_NE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U32 class methods --- + + Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U32 + + Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() + { + } // ~Inst_VOPC__V_CMP_GE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U32 class methods --- + + Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U32 + + Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() + { + } // ~Inst_VOPC__V_CMP_T_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I32 class methods --- + + Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I32 + + Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() + { + } // ~Inst_VOPC__V_CMPX_F_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I32 class methods --- + + Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I32 + + Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() + { + } // ~Inst_VOPC__V_CMPX_LT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I32 class methods --- + + Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I32 + + Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() + { + } // ~Inst_VOPC__V_CMPX_EQ_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I32 class methods --- + + Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I32 + + Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() + { + } // ~Inst_VOPC__V_CMPX_LE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I32 class methods --- + + Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I32 + + Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() + { + } // ~Inst_VOPC__V_CMPX_GT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I32 class methods --- + + Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I32 + + Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() + { + } // ~Inst_VOPC__V_CMPX_NE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I32 class methods --- + + Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I32 + + Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() + { + } // ~Inst_VOPC__V_CMPX_GE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I32 class methods --- + + Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I32 + + Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() + { + } // ~Inst_VOPC__V_CMPX_T_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U32 class methods --- + + Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U32 + + Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() + { + } // ~Inst_VOPC__V_CMPX_F_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U32 class methods --- + + Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U32 + + Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() + { + } // ~Inst_VOPC__V_CMPX_LT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U32 class methods --- + + Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U32 + + Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() + { + } // ~Inst_VOPC__V_CMPX_EQ_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U32 class methods --- + + Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U32 + + Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() + { + } // ~Inst_VOPC__V_CMPX_LE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U32 class methods --- + + Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U32 + + Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() + { + } // ~Inst_VOPC__V_CMPX_GT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U32 class methods --- + + Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U32 + + Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() + { + } // ~Inst_VOPC__V_CMPX_NE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U32 class methods --- + + Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U32 + + Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() + { + } // ~Inst_VOPC__V_CMPX_GE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U32 class methods --- + + Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U32 + + Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() + { + } // ~Inst_VOPC__V_CMPX_T_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I64 class methods --- + + Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I64 + + Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() + { + } // ~Inst_VOPC__V_CMP_F_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I64 class methods --- + + Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I64 + + Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() + { + } // ~Inst_VOPC__V_CMP_LT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I64 class methods --- + + Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I64 + + Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() + { + } // ~Inst_VOPC__V_CMP_EQ_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I64 class methods --- + + Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I64 + + Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() + { + } // ~Inst_VOPC__V_CMP_LE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I64 class methods --- + + Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I64 + + Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() + { + } // ~Inst_VOPC__V_CMP_GT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I64 class methods --- + + Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I64 + + Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() + { + } // ~Inst_VOPC__V_CMP_NE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I64 class methods --- + + Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I64 + + Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() + { + } // ~Inst_VOPC__V_CMP_GE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I64 class methods --- + + Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I64 + + Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() + { + } // ~Inst_VOPC__V_CMP_T_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U64 class methods --- + + Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U64 + + Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() + { + } // ~Inst_VOPC__V_CMP_F_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U64 class methods --- + + Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U64 + + Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() + { + } // ~Inst_VOPC__V_CMP_LT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U64 class methods --- + + Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U64 + + Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() + { + } // ~Inst_VOPC__V_CMP_EQ_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U64 class methods --- + + Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U64 + + Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() + { + } // ~Inst_VOPC__V_CMP_LE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U64 class methods --- + + Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U64 + + Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() + { + } // ~Inst_VOPC__V_CMP_GT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U64 class methods --- + + Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U64 + + Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() + { + } // ~Inst_VOPC__V_CMP_NE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U64 class methods --- + + Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U64 + + Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() + { + } // ~Inst_VOPC__V_CMP_GE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U64 class methods --- + + Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U64 + + Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() + { + } // ~Inst_VOPC__V_CMP_T_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I64 class methods --- + + Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I64 + + Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() + { + } // ~Inst_VOPC__V_CMPX_F_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I64 class methods --- + + Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I64 + + Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() + { + } // ~Inst_VOPC__V_CMPX_LT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I64 class methods --- + + Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I64 + + Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() + { + } // ~Inst_VOPC__V_CMPX_EQ_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I64 class methods --- + + Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I64 + + Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() + { + } // ~Inst_VOPC__V_CMPX_LE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I64 class methods --- + + Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I64 + + Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() + { + } // ~Inst_VOPC__V_CMPX_GT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I64 class methods --- + + Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I64 + + Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() + { + } // ~Inst_VOPC__V_CMPX_NE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I64 class methods --- + + Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I64 + + Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() + { + } // ~Inst_VOPC__V_CMPX_GE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I64 class methods --- + + Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I64 + + Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() + { + } // ~Inst_VOPC__V_CMPX_T_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U64 class methods --- + + Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U64 + + Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() + { + } // ~Inst_VOPC__V_CMPX_F_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U64 class methods --- + + Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U64 + + Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() + { + } // ~Inst_VOPC__V_CMPX_LT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U64 class methods --- + + Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U64 + + Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() + { + } // ~Inst_VOPC__V_CMPX_EQ_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U64 class methods --- + + Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U64 + + Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() + { + } // ~Inst_VOPC__V_CMPX_LE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U64 class methods --- + + Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U64 + + Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() + { + } // ~Inst_VOPC__V_CMPX_GT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U64 class methods --- + + Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U64 + + Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() + { + } // ~Inst_VOPC__V_CMPX_NE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U64 class methods --- + + Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U64 + + Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() + { + } // ~Inst_VOPC__V_CMPX_GE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U64 class methods --- + + Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U64 + + Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() + { + } // ~Inst_VOPC__V_CMPX_T_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute +} // namespace VegaISA +} // namespace gem5