gpu-compute: remove index-based operand access
This commit removes functions that indexed into the vectors that held the operands. Instead, for-each loops are used, iterating through one of 6 vectors (src, dst, srcScalar, srcVec, dstScalar, dstVec) that all hold various (potentially overlapping) combinations of the operands. Change-Id: Ia3a857c8f6675be86c51ba2f77e3d85bfea9ffdb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42212 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matt Sinclair
parent
b40b361bee
commit
2bb8d6bc0c
@@ -168,11 +168,11 @@ namespace Gcn3ISA
|
||||
typedef int64_t VecElemI64;
|
||||
typedef double VecElemF64;
|
||||
|
||||
const int DWORDSize = sizeof(VecElemU32);
|
||||
const int DWordSize = sizeof(VecElemU32);
|
||||
/**
|
||||
* Size of a single-precision register in DWORDs.
|
||||
* Size of a single-precision register in DWords.
|
||||
*/
|
||||
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
|
||||
const int RegSizeDWords = sizeof(VecElemU32) / DWordSize;
|
||||
|
||||
// typedefs for the various sizes/types of vector regs
|
||||
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
|
||||
|
||||
@@ -63,20 +63,21 @@ namespace Gcn3ISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC1), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -150,16 +151,26 @@ namespace Gcn3ISA
|
||||
int opNum = 0;
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SIMM16;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
int reg = instData.SDST;
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SIMM16;
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
if (numDstRegOperands()){
|
||||
reg = instData.SDST;
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -237,16 +248,17 @@ namespace Gcn3ISA
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
if (instData.OP != 0x1C) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -311,13 +323,14 @@ namespace Gcn3ISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC1), false, false);
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
@@ -385,18 +398,19 @@ namespace Gcn3ISA
|
||||
if (numSrcRegOperands()) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SIMM16;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
|
||||
opNum++;
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
}
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -496,22 +510,22 @@ namespace Gcn3ISA
|
||||
if (numSrcRegOperands()) {
|
||||
reg = instData.SDATA;
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SBASE;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.OFFSET;
|
||||
if (instData.IMM) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
} else {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
opNum++;
|
||||
@@ -519,11 +533,12 @@ namespace Gcn3ISA
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = instData.SDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -600,35 +615,36 @@ namespace Gcn3ISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.VSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
// VCC read
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
// VDST
|
||||
reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
// VCC write
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -735,18 +751,19 @@ namespace Gcn3ISA
|
||||
int reg = instData.SRC0;
|
||||
|
||||
if (numSrcRegOperands()) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -827,20 +844,21 @@ namespace Gcn3ISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.VSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
assert(writesVCC());
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -928,13 +946,13 @@ namespace Gcn3ISA
|
||||
int numDst = numDstRegOperands() - writesVCC();
|
||||
|
||||
for (opNum = 0; opNum < numSrc; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
isScalarReg(srcs[opNum]),
|
||||
isVectorReg(srcs[opNum]), false);
|
||||
}
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -942,17 +960,18 @@ namespace Gcn3ISA
|
||||
if (numDst) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
sgprDst, !sgprDst, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1048,13 +1067,13 @@ namespace Gcn3ISA
|
||||
int numDst = numDstRegOperands() - writesVCC();
|
||||
|
||||
for (opNum = 0; opNum < numSrc; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
isScalarReg(srcs[opNum]),
|
||||
isVectorReg(srcs[opNum]), false);
|
||||
}
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1062,17 +1081,18 @@ namespace Gcn3ISA
|
||||
if (numDst) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1151,18 +1171,19 @@ namespace Gcn3ISA
|
||||
int opIdx = 0;
|
||||
|
||||
for (opIdx = 0; opIdx < numSrcRegOperands(); opIdx++){
|
||||
operands.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
|
||||
srcOps.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = extData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opIdx), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opIdx), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1241,23 +1262,23 @@ namespace Gcn3ISA
|
||||
if (numSrcRegOperands()) {
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SOFFSET;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1265,11 +1286,12 @@ namespace Gcn3ISA
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1331,34 +1353,35 @@ namespace Gcn3ISA
|
||||
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SOFFSET;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1403,24 +1426,24 @@ namespace Gcn3ISA
|
||||
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
if (getNumOperands() == 4) {
|
||||
reg = extData.SSAMP;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1428,11 +1451,12 @@ namespace Gcn3ISA
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1468,7 +1492,7 @@ namespace Gcn3ISA
|
||||
extData.VSRC2, extData.VSRC3};
|
||||
|
||||
for (opNum = 0; opNum < 4; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
@@ -1520,24 +1544,25 @@ namespace Gcn3ISA
|
||||
assert(isAtomic());
|
||||
|
||||
reg = extData.ADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
if (numSrcRegOperands() == 2) {
|
||||
reg = extData.DATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -168,11 +168,11 @@ namespace VegaISA
|
||||
typedef int64_t VecElemI64;
|
||||
typedef double VecElemF64;
|
||||
|
||||
const int DWORDSize = sizeof(VecElemU32);
|
||||
const int DWordSize = sizeof(VecElemU32);
|
||||
/**
|
||||
* Size of a single-precision register in DWORDs.
|
||||
* Size of a single-precision register in DWords.
|
||||
*/
|
||||
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
|
||||
const int RegSizeDWords = sizeof(VecElemU32) / DWordSize;
|
||||
|
||||
// typedefs for the various sizes/types of vector regs
|
||||
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
|
||||
|
||||
@@ -63,20 +63,21 @@ namespace VegaISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC1), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -150,16 +151,26 @@ namespace VegaISA
|
||||
int opNum = 0;
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SIMM16;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
int reg = instData.SDST;
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SIMM16;
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
if (numDstRegOperands()) {
|
||||
reg = instData.SDST;
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -237,16 +248,17 @@ namespace VegaISA
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
if (instData.OP != 0x1C) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(instData.SDST), false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -311,13 +323,14 @@ namespace VegaISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SSRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC0), false, false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.SSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(instData.SSRC1), false, false);
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
@@ -385,18 +398,19 @@ namespace VegaISA
|
||||
if (numSrcRegOperands()) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SIMM16;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
|
||||
opNum++;
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
}
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -496,22 +510,22 @@ namespace VegaISA
|
||||
if (numSrcRegOperands()) {
|
||||
reg = instData.SDATA;
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = instData.SBASE;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.OFFSET;
|
||||
if (instData.IMM) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, false, true);
|
||||
} else {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
opNum++;
|
||||
@@ -519,11 +533,12 @@ namespace VegaISA
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = instData.SDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
isScalarReg(reg), false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -600,35 +615,36 @@ namespace VegaISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.VSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
// VCC read
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
// VDST
|
||||
reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
// VCC write
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -735,18 +751,19 @@ namespace VegaISA
|
||||
int reg = instData.SRC0;
|
||||
|
||||
if (numSrcRegOperands()) {
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -827,20 +844,21 @@ namespace VegaISA
|
||||
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.SRC0;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
isScalarReg(reg), isVectorReg(reg), false);
|
||||
opNum++;
|
||||
|
||||
reg = instData.VSRC1;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
assert(writesVCC());
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -928,13 +946,13 @@ namespace VegaISA
|
||||
int numDst = numDstRegOperands() - writesVCC();
|
||||
|
||||
for (opNum = 0; opNum < numSrc; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
isScalarReg(srcs[opNum]),
|
||||
isVectorReg(srcs[opNum]), false);
|
||||
}
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -942,17 +960,18 @@ namespace VegaISA
|
||||
if (numDst) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
sgprDst, !sgprDst, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1047,13 +1066,13 @@ namespace VegaISA
|
||||
int numDst = numDstRegOperands() - writesVCC();
|
||||
|
||||
for (opNum = 0; opNum < numSrc; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
isScalarReg(srcs[opNum]),
|
||||
isVectorReg(srcs[opNum]), false);
|
||||
}
|
||||
|
||||
if (readsVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1061,17 +1080,18 @@ namespace VegaISA
|
||||
if (numDst) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = instData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (writesVCC()) {
|
||||
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
|
||||
true, false, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1150,18 +1170,19 @@ namespace VegaISA
|
||||
int opIdx = 0;
|
||||
|
||||
for (opIdx = 0; opIdx < numSrcRegOperands(); opIdx++){
|
||||
operands.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
|
||||
srcOps.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
// Needed because can't take addr of bitfield
|
||||
int reg = extData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opIdx), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opIdx), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1241,23 +1262,23 @@ namespace VegaISA
|
||||
if (numSrcRegOperands()) {
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SOFFSET;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1265,11 +1286,12 @@ namespace VegaISA
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1332,34 +1354,35 @@ namespace VegaISA
|
||||
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SOFFSET;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1405,24 +1428,24 @@ namespace VegaISA
|
||||
|
||||
if (numSrcRegOperands() == getNumOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
reg = extData.VADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
reg = extData.SRSRC;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
|
||||
if (getNumOperands() == 4) {
|
||||
reg = extData.SSAMP;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
true, false, false);
|
||||
opNum++;
|
||||
}
|
||||
@@ -1430,11 +1453,12 @@ namespace VegaISA
|
||||
// extData.VDATA moves in the reg list depending on the instruction
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1470,7 +1494,7 @@ namespace VegaISA
|
||||
extData.VSRC2, extData.VSRC3};
|
||||
|
||||
for (opNum = 0; opNum < 4; opNum++) {
|
||||
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
@@ -1522,24 +1546,25 @@ namespace VegaISA
|
||||
assert(isAtomic());
|
||||
|
||||
reg = extData.ADDR;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
|
||||
if (numSrcRegOperands() == 2) {
|
||||
reg = extData.DATA;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), true,
|
||||
srcOps.emplace_back(reg, getOperandSize(opNum), true,
|
||||
false, true, false);
|
||||
opNum++;
|
||||
}
|
||||
|
||||
if (numDstRegOperands()) {
|
||||
reg = extData.VDST;
|
||||
operands.emplace_back(reg, getOperandSize(opNum), false,
|
||||
dstOps.emplace_back(reg, getOperandSize(opNum), false,
|
||||
false, true, false);
|
||||
}
|
||||
|
||||
assert(operands.size() == getNumOperands());
|
||||
assert(srcOps.size() == numSrcRegOperands());
|
||||
assert(dstOps.size() == numDstRegOperands());
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -557,7 +557,6 @@ FetchUnit::FetchBufDesc::decodeInsts()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
|
||||
@@ -598,7 +597,6 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
|
||||
|
||||
@@ -45,7 +45,7 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
: GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
|
||||
(Addr)0), numScalarReqs(0), isSaveRestore(false),
|
||||
_staticInst(static_inst), _seqNum(instSeqNum),
|
||||
maxSrcVecRegOpSize(0), maxSrcScalarRegOpSize(0)
|
||||
maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
|
||||
{
|
||||
_staticInst->initOperandInfo();
|
||||
statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
|
||||
@@ -83,108 +83,13 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
wg_id = -1;
|
||||
wfSlotId = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GPUDynInst::initOperandInfo()
|
||||
{
|
||||
/**
|
||||
* Generate and cache the operand to register mapping information. This
|
||||
* prevents this info from being generated multiple times throughout
|
||||
* the CU pipeline.
|
||||
*/
|
||||
|
||||
DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
|
||||
disassemble(), getNumOperands());
|
||||
|
||||
for (int op_idx = 0; op_idx < getNumOperands(); ++op_idx) {
|
||||
int virt_idx(-1);
|
||||
int phys_idx(-1);
|
||||
int op_num_dwords(-1);
|
||||
_staticInst->initDynOperandInfo(wavefront(), computeUnit());
|
||||
|
||||
if (isVectorRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
|
||||
if (op_num_dwords > maxSrcVecRegOpSize) {
|
||||
maxSrcVecRegOpSize = op_num_dwords;
|
||||
}
|
||||
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding vector src (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
srcVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
|
||||
phys_indices);
|
||||
} else {
|
||||
assert(isDstOperand(op_idx));
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding vector dst (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
dstVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
|
||||
phys_indices);
|
||||
}
|
||||
} else if (isScalarRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
|
||||
if (op_num_dwords > maxSrcScalarRegOpSize) {
|
||||
maxSrcScalarRegOpSize = op_num_dwords;
|
||||
}
|
||||
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding scalar src (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
srcScalarRegOps.emplace_back(op_idx, op_num_dwords,
|
||||
virt_indices, phys_indices);
|
||||
} else {
|
||||
assert(isDstOperand(op_idx));
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding scalar dst (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
dstScalarRegOps.emplace_back(op_idx, op_num_dwords,
|
||||
virt_indices, phys_indices);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GPUDynInst::~GPUDynInst()
|
||||
@@ -202,6 +107,30 @@ GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
|
||||
_staticInst->execute(gpuDynInst);
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
GPUDynInst::srcVecRegOperands() const
|
||||
{
|
||||
return _staticInst->srcVecRegOperands();
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
GPUDynInst::dstVecRegOperands() const
|
||||
{
|
||||
return _staticInst->dstVecRegOperands();
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
GPUDynInst::srcScalarRegOperands() const
|
||||
{
|
||||
return _staticInst->srcScalarRegOperands();
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
GPUDynInst::dstScalarRegOperands() const
|
||||
{
|
||||
return _staticInst->dstScalarRegOperands();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcRegOperands()
|
||||
{
|
||||
@@ -217,152 +146,113 @@ GPUDynInst::numDstRegOperands()
|
||||
int
|
||||
GPUDynInst::numSrcVecRegOperands() const
|
||||
{
|
||||
return srcVecRegOps.size();
|
||||
return _staticInst->numSrcVecOperands();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstVecRegOperands() const
|
||||
{
|
||||
return dstVecRegOps.size();
|
||||
return _staticInst->numDstVecOperands();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::maxSrcVecRegOperandSize() const
|
||||
GPUDynInst::maxSrcVecRegOperandSize()
|
||||
{
|
||||
if (maxSrcVecRegOpSize != -1)
|
||||
return maxSrcVecRegOpSize;
|
||||
|
||||
maxSrcVecRegOpSize = 0;
|
||||
for (const auto& srcVecOp : srcVecRegOperands())
|
||||
if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
|
||||
maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
|
||||
|
||||
return maxSrcVecRegOpSize;
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcVecDWords()
|
||||
{
|
||||
return _staticInst->numSrcVecDWords();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstVecDWords()
|
||||
{
|
||||
return _staticInst->numDstVecDWords();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcScalarRegOperands() const
|
||||
{
|
||||
return srcScalarRegOps.size();
|
||||
return _staticInst->numSrcScalarOperands();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstScalarRegOperands() const
|
||||
{
|
||||
return dstScalarRegOps.size();
|
||||
return _staticInst->numDstScalarOperands();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::maxSrcScalarRegOperandSize() const
|
||||
GPUDynInst::maxSrcScalarRegOperandSize()
|
||||
{
|
||||
if (maxSrcScalarRegOpSize != -1)
|
||||
return maxSrcScalarRegOpSize;
|
||||
|
||||
maxSrcScalarRegOpSize = 0;
|
||||
for (const auto& srcScOp : srcScalarRegOperands())
|
||||
if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
|
||||
maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
|
||||
|
||||
return maxSrcScalarRegOpSize;
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcVecDWORDs()
|
||||
GPUDynInst::numSrcScalarDWords()
|
||||
{
|
||||
return _staticInst->numSrcVecDWORDs();
|
||||
return _staticInst->numSrcScalarDWords();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstVecDWORDs()
|
||||
GPUDynInst::numDstScalarDWords()
|
||||
{
|
||||
return _staticInst->numDstVecDWORDs();
|
||||
return _staticInst->numDstScalarDWords();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numOpdDWORDs(int operandIdx)
|
||||
GPUDynInst::maxOperandSize()
|
||||
{
|
||||
return _staticInst->numOpdDWORDs(operandIdx);
|
||||
return _staticInst->maxOperandSize();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::getNumOperands()
|
||||
GPUDynInst::getNumOperands() const
|
||||
{
|
||||
return _staticInst->getNumOperands();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::isVectorRegister(int operandIdx)
|
||||
{
|
||||
return _staticInst->isVectorRegister(operandIdx);
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::isScalarRegister(int operandIdx)
|
||||
{
|
||||
return _staticInst->isScalarRegister(operandIdx);
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::getRegisterIndex(int operandIdx)
|
||||
{
|
||||
return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::getOperandSize(int operandIdx)
|
||||
{
|
||||
return _staticInst->getOperandSize(operandIdx);
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::isDstOperand(int operandIdx)
|
||||
{
|
||||
return _staticInst->isDstOperand(operandIdx);
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::isSrcOperand(int operandIdx)
|
||||
{
|
||||
return _staticInst->isSrcOperand(operandIdx);
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasSourceSgpr() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isScalarRegister(i) && _staticInst->isSrcOperand(i)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasSourceVgpr() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isVectorRegister(i) && _staticInst->isSrcOperand(i)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasDestinationSgpr() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isScalarRegister(i) && _staticInst->isDstOperand(i)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::srcIsVgpr(int index) const
|
||||
{
|
||||
assert(index >= 0 && index < _staticInst->getNumOperands());
|
||||
if (_staticInst->isVectorRegister(index) &&
|
||||
_staticInst->isSrcOperand(index)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return !srcVecRegOperands().empty();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasDestinationVgpr() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isVectorRegister(i) && _staticInst->isDstOperand(i)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return !dstVecRegOperands().empty();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasSourceSgpr() const
|
||||
{
|
||||
return !srcScalarRegOperands().empty();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::hasDestinationSgpr() const
|
||||
{
|
||||
return !dstScalarRegOperands().empty();
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -580,12 +470,20 @@ GPUDynInst::writesSCC() const
|
||||
bool
|
||||
GPUDynInst::readsVCC() const
|
||||
{
|
||||
for (const auto& srcOp : _staticInst->srcOperands())
|
||||
if (srcOp.isVcc())
|
||||
return true;
|
||||
|
||||
return _staticInst->readsVCC();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::writesVCC() const
|
||||
{
|
||||
for (const auto& dstOp : _staticInst->dstOperands())
|
||||
if (dstOp.isVcc())
|
||||
return true;
|
||||
|
||||
return _staticInst->writesVCC();
|
||||
}
|
||||
|
||||
@@ -602,13 +500,13 @@ GPUDynInst::writesMode() const
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::readsEXEC() const
|
||||
GPUDynInst::readsExec() const
|
||||
{
|
||||
return _staticInst->readsEXEC();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::writesEXEC() const
|
||||
GPUDynInst::writesExec() const
|
||||
{
|
||||
return _staticInst->writesEXEC();
|
||||
}
|
||||
@@ -622,42 +520,40 @@ GPUDynInst::ignoreExec() const
|
||||
bool
|
||||
GPUDynInst::writesExecMask() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
return _staticInst->isDstOperand(i) &&
|
||||
_staticInst->isExecMaskRegister(i);
|
||||
}
|
||||
return false;
|
||||
for (const auto& dstOp : _staticInst->dstOperands())
|
||||
if (dstOp.isExec())
|
||||
return true;
|
||||
|
||||
return _staticInst->writesEXEC();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::readsExecMask() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
return _staticInst->isSrcOperand(i) &&
|
||||
_staticInst->isExecMaskRegister(i);
|
||||
}
|
||||
return false;
|
||||
for (const auto& srcOp : _staticInst->srcOperands())
|
||||
if (srcOp.isExec())
|
||||
return true;
|
||||
|
||||
return _staticInst->readsEXEC();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::writesFlatScratch() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isScalarRegister(i) && _staticInst->isDstOperand(i)) {
|
||||
return _staticInst->isFlatScratchRegister(i);
|
||||
}
|
||||
}
|
||||
for (const auto& dstScalarOp : dstScalarRegOperands())
|
||||
if (dstScalarOp.isFlatScratch())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::readsFlatScratch() const
|
||||
{
|
||||
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
|
||||
if (_staticInst->isScalarRegister(i) && _staticInst->isSrcOperand(i)) {
|
||||
return _staticInst->isFlatScratchRegister(i);
|
||||
}
|
||||
}
|
||||
for (const auto& srcScalarOp : srcScalarRegOperands())
|
||||
if (srcScalarOp.isFlatScratch())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "enums/StorageClassType.hh"
|
||||
#include "gpu-compute/compute_unit.hh"
|
||||
#include "gpu-compute/gpu_exec_context.hh"
|
||||
#include "gpu-compute/operand_info.hh"
|
||||
|
||||
class GPUStaticInst;
|
||||
|
||||
@@ -89,7 +90,7 @@ class RegisterOperandInfo
|
||||
/**
|
||||
* The number of registers required to store this operand.
|
||||
*/
|
||||
int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWORDs; }
|
||||
int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWords; }
|
||||
int operandIdx() const { return opIdx; }
|
||||
/**
|
||||
* We typically only need the first virtual register for the operand
|
||||
@@ -117,65 +118,42 @@ class GPUDynInst : public GPUExecContext
|
||||
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
|
||||
uint64_t instSeqNum);
|
||||
~GPUDynInst();
|
||||
void initOperandInfo();
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
srcVecRegOperands() const
|
||||
{
|
||||
return srcVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
dstVecRegOperands() const
|
||||
{
|
||||
return dstVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
srcScalarRegOperands() const
|
||||
{
|
||||
return srcScalarRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
dstScalarRegOperands() const
|
||||
{
|
||||
return dstScalarRegOps;
|
||||
}
|
||||
|
||||
int numSrcVecRegOperands() const;
|
||||
int numDstVecRegOperands() const;
|
||||
int maxSrcVecRegOperandSize() const;
|
||||
int numSrcScalarRegOperands() const;
|
||||
int numDstScalarRegOperands() const;
|
||||
int maxSrcScalarRegOperandSize() const;
|
||||
const std::vector<OperandInfo>& srcVecRegOperands() const;
|
||||
const std::vector<OperandInfo>& dstVecRegOperands() const;
|
||||
const std::vector<OperandInfo>& srcScalarRegOperands() const;
|
||||
const std::vector<OperandInfo>& dstScalarRegOperands() const;
|
||||
|
||||
int numSrcRegOperands();
|
||||
int numDstRegOperands();
|
||||
int numSrcVecDWORDs();
|
||||
int numDstVecDWORDs();
|
||||
int numOpdDWORDs(int operandIdx);
|
||||
int getNumOperands();
|
||||
bool isVectorRegister(int operandIdx);
|
||||
bool isScalarRegister(int operandIdx);
|
||||
int getRegisterIndex(int operandIdx);
|
||||
int getOperandSize(int operandIdx);
|
||||
bool isDstOperand(int operandIdx);
|
||||
bool isSrcOperand(int operandIdx);
|
||||
|
||||
bool hasDestinationSgpr() const;
|
||||
int numSrcVecRegOperands() const;
|
||||
int numDstVecRegOperands() const;
|
||||
int maxSrcVecRegOperandSize();
|
||||
int numSrcVecDWords();
|
||||
int numDstVecDWords();
|
||||
|
||||
int numSrcScalarRegOperands() const;
|
||||
int numDstScalarRegOperands() const;
|
||||
int maxSrcScalarRegOperandSize();
|
||||
int numSrcScalarDWords();
|
||||
int numDstScalarDWords();
|
||||
|
||||
int maxOperandSize();
|
||||
|
||||
int getNumOperands() const;
|
||||
|
||||
bool hasSourceSgpr() const;
|
||||
bool hasDestinationVgpr() const;
|
||||
bool hasDestinationSgpr() const;
|
||||
bool hasSourceVgpr() const;
|
||||
bool hasDestinationVgpr() const;
|
||||
|
||||
// returns true if the string "opcodeStr" is found in the
|
||||
// opcode of the instruction
|
||||
bool isOpcode(const std::string& opcodeStr) const;
|
||||
bool isOpcode(const std::string& opcodeStr,
|
||||
const std::string& extStr) const;
|
||||
// returns true if source operand at "index" is a vector register
|
||||
bool srcIsVgpr(int index) const;
|
||||
|
||||
const std::string &disassemble() const;
|
||||
|
||||
@@ -264,8 +242,8 @@ class GPUDynInst : public GPUExecContext
|
||||
bool writesSCC() const;
|
||||
bool readsVCC() const;
|
||||
bool writesVCC() const;
|
||||
bool readsEXEC() const;
|
||||
bool writesEXEC() const;
|
||||
bool readsExec() const;
|
||||
bool writesExec() const;
|
||||
bool readsMode() const;
|
||||
bool writesMode() const;
|
||||
bool ignoreExec() const;
|
||||
@@ -509,12 +487,6 @@ class GPUDynInst : public GPUExecContext
|
||||
// hold each cache block address for the instruction and a vector
|
||||
// to hold the tick when the block arrives at certain hop points
|
||||
std::map<Addr, std::vector<Tick>> lineAddressTime;
|
||||
|
||||
// Operand info.
|
||||
std::vector<RegisterOperandInfo> srcVecRegOps;
|
||||
std::vector<RegisterOperandInfo> dstVecRegOps;
|
||||
std::vector<RegisterOperandInfo> srcScalarRegOps;
|
||||
std::vector<RegisterOperandInfo> dstScalarRegOps;
|
||||
};
|
||||
|
||||
#endif // __GPU_DYN_INST_HH__
|
||||
|
||||
@@ -33,10 +33,12 @@
|
||||
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
|
||||
#include "debug/GPUInst.hh"
|
||||
|
||||
GPUStaticInst::GPUStaticInst(const std::string &opcode)
|
||||
: executed_as(Enums::SC_NONE), _opcode(opcode),
|
||||
_instNum(0), _instAddr(0), srcVecOperands(-1), dstVecOperands(-1),
|
||||
srcVecDWORDs(-1), dstVecDWORDs(-1)
|
||||
_instNum(0), _instAddr(0), srcVecDWords(-1), dstVecDWords(-1),
|
||||
srcScalarDWords(-1), dstScalarDWords(-1), maxOpSize(-1)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -51,79 +53,160 @@ GPUStaticInst::disassemble()
|
||||
return disassembly;
|
||||
}
|
||||
|
||||
void
|
||||
GPUStaticInst::initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
|
||||
{
|
||||
// Lambda function, as this is only ever used here
|
||||
auto generateVirtToPhysMap = [&](OperandInfo& op,
|
||||
std::vector<OperandInfo>& opVec,
|
||||
MapRegFn mapFn, OpType opType)
|
||||
{
|
||||
std::vector<int> virt_idxs;
|
||||
std::vector<int> phys_idxs;
|
||||
|
||||
int num_dwords = op.sizeInDWords();
|
||||
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
|
||||
|
||||
int phys_idx = -1;
|
||||
for (int i = 0; i < num_dwords; i++){
|
||||
phys_idx = (cu->registerManager->*mapFn)(wf, virt_idx + i);
|
||||
virt_idxs.push_back(virt_idx + i);
|
||||
phys_idxs.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
|
||||
"%d registers.\n", disassemble(),
|
||||
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
|
||||
"vector" : "scalar",
|
||||
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
|
||||
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
|
||||
|
||||
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
|
||||
|
||||
opVec.emplace_back(op);
|
||||
};
|
||||
|
||||
for (auto& srcOp : srcOps) {
|
||||
if (srcOp.isVectorReg()) {
|
||||
generateVirtToPhysMap(srcOp, srcVecRegOps,
|
||||
&RegisterManager::mapVgpr, OpType::SRC_VEC);
|
||||
} else if (srcOp.isScalarReg()) {
|
||||
generateVirtToPhysMap(srcOp, srcScalarRegOps,
|
||||
&RegisterManager::mapSgpr, OpType::SRC_SCALAR);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& dstOp : dstOps) {
|
||||
if (dstOp.isVectorReg()) {
|
||||
generateVirtToPhysMap(dstOp, dstVecRegOps,
|
||||
&RegisterManager::mapVgpr, OpType::DST_VEC);
|
||||
} else if (dstOp.isScalarReg()) {
|
||||
generateVirtToPhysMap(dstOp, dstScalarRegOps,
|
||||
&RegisterManager::mapSgpr, OpType::DST_SCALAR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numSrcVecOperands()
|
||||
{
|
||||
if (srcVecOperands > -1)
|
||||
return srcVecOperands;
|
||||
|
||||
srcVecOperands = 0;
|
||||
if (!isScalar()) {
|
||||
for (int k = 0; k < getNumOperands(); ++k) {
|
||||
if (isVectorRegister(k) && isSrcOperand(k))
|
||||
srcVecOperands++;
|
||||
}
|
||||
}
|
||||
return srcVecOperands;
|
||||
return srcVecRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numDstVecOperands()
|
||||
{
|
||||
if (dstVecOperands > -1)
|
||||
return dstVecOperands;
|
||||
|
||||
dstVecOperands = 0;
|
||||
if (!isScalar()) {
|
||||
for (int k = 0; k < getNumOperands(); ++k) {
|
||||
if (isVectorRegister(k) && isDstOperand(k))
|
||||
dstVecOperands++;
|
||||
}
|
||||
}
|
||||
return dstVecOperands;
|
||||
return dstVecRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numSrcVecDWORDs()
|
||||
GPUStaticInst::numSrcVecDWords()
|
||||
{
|
||||
if (srcVecDWORDs > -1) {
|
||||
return srcVecDWORDs;
|
||||
if (srcVecDWords != -1) {
|
||||
return srcVecDWords;
|
||||
}
|
||||
|
||||
srcVecDWORDs = 0;
|
||||
if (!isScalar()) {
|
||||
for (int i = 0; i < getNumOperands(); i++) {
|
||||
if (isVectorRegister(i) && isSrcOperand(i)) {
|
||||
int dwords = numOpdDWORDs(i);
|
||||
srcVecDWORDs += dwords;
|
||||
}
|
||||
}
|
||||
}
|
||||
return srcVecDWORDs;
|
||||
srcVecDWords = 0;
|
||||
|
||||
for (const auto& srcOp : srcOps)
|
||||
if (srcOp.isVectorReg())
|
||||
srcVecDWords += srcOp.sizeInDWords();
|
||||
|
||||
return srcVecDWords;
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numDstVecDWORDs()
|
||||
GPUStaticInst::numDstVecDWords()
|
||||
{
|
||||
if (dstVecDWORDs > -1) {
|
||||
return dstVecDWORDs;
|
||||
if (dstVecDWords != -1) {
|
||||
return dstVecDWords;
|
||||
}
|
||||
|
||||
dstVecDWORDs = 0;
|
||||
if (!isScalar()) {
|
||||
for (int i = 0; i < getNumOperands(); i++) {
|
||||
if (isVectorRegister(i) && isDstOperand(i)) {
|
||||
int dwords = numOpdDWORDs(i);
|
||||
dstVecDWORDs += dwords;
|
||||
}
|
||||
}
|
||||
}
|
||||
return dstVecDWORDs;
|
||||
dstVecDWords = 0;
|
||||
|
||||
for (const auto& dstOp : dstOps)
|
||||
if (dstOp.isVectorReg())
|
||||
dstVecDWords += dstOp.sizeInDWords();
|
||||
|
||||
return dstVecDWords;
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numOpdDWORDs(int operandIdx)
|
||||
GPUStaticInst::numSrcScalarOperands()
|
||||
{
|
||||
return getOperandSize(operandIdx) <= 4 ? 1
|
||||
: getOperandSize(operandIdx) / 4;
|
||||
return srcScalarRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numDstScalarOperands()
|
||||
{
|
||||
return dstScalarRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numSrcScalarDWords()
|
||||
{
|
||||
if (srcScalarDWords != -1)
|
||||
return srcScalarDWords;
|
||||
|
||||
srcScalarDWords = 0;
|
||||
|
||||
for (const auto& srcOp : srcOps)
|
||||
if (srcOp.isScalarReg())
|
||||
srcScalarDWords += srcOp.sizeInDWords();
|
||||
|
||||
return srcScalarDWords;
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::numDstScalarDWords()
|
||||
{
|
||||
if (dstScalarDWords != -1)
|
||||
return dstScalarDWords;
|
||||
|
||||
dstScalarDWords = 0;
|
||||
|
||||
for (const auto& dstOp : dstOps)
|
||||
if (dstOp.isScalarReg())
|
||||
dstScalarDWords += dstOp.sizeInDWords();
|
||||
|
||||
return dstScalarDWords;
|
||||
}
|
||||
|
||||
int
|
||||
GPUStaticInst::maxOperandSize()
|
||||
{
|
||||
if (maxOpSize != -1)
|
||||
return maxOpSize;
|
||||
|
||||
maxOpSize = 0;
|
||||
|
||||
for (const auto& dstOp : dstOps)
|
||||
if (dstOp.size() > maxOpSize)
|
||||
maxOpSize = dstOp.size();
|
||||
|
||||
for (const auto& srcOp : srcOps)
|
||||
if (srcOp.size() > maxOpSize)
|
||||
maxOpSize = srcOp.size();
|
||||
|
||||
return maxOpSize;
|
||||
}
|
||||
|
||||
@@ -52,10 +52,10 @@
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
#include "gpu-compute/misc.hh"
|
||||
#include "gpu-compute/operand_info.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
class BaseOperand;
|
||||
class BaseRegOperand;
|
||||
class Wavefront;
|
||||
|
||||
class GPUStaticInst : public GPUStaticInstFlags
|
||||
{
|
||||
@@ -76,41 +76,33 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||
|
||||
virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
|
||||
|
||||
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu);
|
||||
|
||||
virtual void initOperandInfo() = 0;
|
||||
virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
|
||||
virtual void generateDisassembly() = 0;
|
||||
const std::string& disassemble();
|
||||
virtual int getNumOperands() = 0;
|
||||
bool isScalarRegister(int operandIndex)
|
||||
{ return operands[operandIndex].isScalarReg(); }
|
||||
|
||||
bool isVectorRegister(int operandIndex)
|
||||
{ return operands[operandIndex].isVectorReg(); }
|
||||
|
||||
bool isSrcOperand(int operandIndex)
|
||||
{ return operands[operandIndex].isSrc(); }
|
||||
|
||||
bool isDstOperand(int operandIndex)
|
||||
{ return operands[operandIndex].isDst(); }
|
||||
|
||||
virtual bool isFlatScratchRegister(int opIdx) = 0;
|
||||
virtual bool isExecMaskRegister(int opIdx) = 0;
|
||||
virtual int getOperandSize(int operandIndex) = 0;
|
||||
|
||||
int getRegisterIndex(int operandIndex, int num_scalar_regs)
|
||||
{ return operands[operandIndex].registerIndex(num_scalar_regs); }
|
||||
|
||||
virtual int numDstRegOperands() = 0;
|
||||
virtual int numSrcRegOperands() = 0;
|
||||
|
||||
virtual int coalescerTokenCount() const { return 0; }
|
||||
|
||||
int numDstVecOperands();
|
||||
int numSrcVecOperands();
|
||||
int numDstVecDWORDs();
|
||||
int numSrcVecDWORDs();
|
||||
int numDstVecOperands();
|
||||
int numSrcVecDWords();
|
||||
int numDstVecDWords();
|
||||
|
||||
int numOpdDWORDs(int operandIdx);
|
||||
int numSrcScalarOperands();
|
||||
int numDstScalarOperands();
|
||||
int numSrcScalarDWords();
|
||||
int numDstScalarDWords();
|
||||
|
||||
int maxOperandSize();
|
||||
|
||||
virtual int coalescerTokenCount() const { return 0; }
|
||||
|
||||
bool isALU() const { return _flags[ALU]; }
|
||||
bool isBranch() const { return _flags[Branch]; }
|
||||
@@ -268,18 +260,58 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||
}
|
||||
const std::string& opcode() const { return _opcode; }
|
||||
|
||||
const std::vector<OperandInfo>& srcOperands() const { return srcOps; }
|
||||
const std::vector<OperandInfo>& dstOperands() const { return dstOps; }
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
srcVecRegOperands() const
|
||||
{
|
||||
return srcVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
dstVecRegOperands() const
|
||||
{
|
||||
return dstVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
srcScalarRegOperands() const
|
||||
{
|
||||
return srcScalarRegOps;
|
||||
}
|
||||
|
||||
const std::vector<OperandInfo>&
|
||||
dstScalarRegOperands() const
|
||||
{
|
||||
return dstScalarRegOps;
|
||||
}
|
||||
|
||||
// These next 2 lines are used in initDynOperandInfo to let the lambda
|
||||
// function work
|
||||
typedef int (RegisterManager::*MapRegFn)(Wavefront *, int);
|
||||
enum OpType { SRC_VEC, SRC_SCALAR, DST_VEC, DST_SCALAR };
|
||||
|
||||
protected:
|
||||
const std::string _opcode;
|
||||
std::string disassembly;
|
||||
int _instNum;
|
||||
int _instAddr;
|
||||
std::vector<OperandInfo> operands;
|
||||
std::vector<OperandInfo> srcOps;
|
||||
std::vector<OperandInfo> dstOps;
|
||||
|
||||
private:
|
||||
int srcVecOperands;
|
||||
int dstVecOperands;
|
||||
int srcVecDWORDs;
|
||||
int dstVecDWORDs;
|
||||
int srcVecDWords;
|
||||
int dstVecDWords;
|
||||
int srcScalarDWords;
|
||||
int dstScalarDWords;
|
||||
int maxOpSize;
|
||||
|
||||
std::vector<OperandInfo> srcVecRegOps;
|
||||
std::vector<OperandInfo> dstVecRegOps;
|
||||
std::vector<OperandInfo> srcScalarRegOps;
|
||||
std::vector<OperandInfo> dstScalarRegOps;
|
||||
|
||||
/**
|
||||
* Identifier of the immediate post-dominator instruction.
|
||||
*/
|
||||
|
||||
@@ -44,7 +44,8 @@ class OperandInfo
|
||||
OperandInfo() = delete;
|
||||
OperandInfo(int opSelectorVal, int size, bool src, bool scalar_reg,
|
||||
bool vector_reg, bool imm)
|
||||
: _opSelectorVal(opSelectorVal), _size(size)
|
||||
: _opSelectorVal(opSelectorVal), _size(size),
|
||||
_numDWords(size <= 4 ? 1 : size / 4)
|
||||
{
|
||||
if (src)
|
||||
flags.set(SRC);
|
||||
@@ -68,7 +69,14 @@ class OperandInfo
|
||||
flags.set(POS_CONST);
|
||||
}
|
||||
|
||||
int numRegisters() const { return _numDWords / TheGpuISA::RegSizeDWords; }
|
||||
int sizeInDWords() const { return _numDWords; }
|
||||
|
||||
int size() const { return _size; }
|
||||
// Certain opIdx's get changed in calls to opSelectorToRegIdx
|
||||
// This avoids that by returning the exact value
|
||||
int rawRegisterIndex() const { return _opSelectorVal; }
|
||||
|
||||
int
|
||||
registerIndex(int numScalarRegs) const
|
||||
{
|
||||
@@ -88,6 +96,41 @@ class OperandInfo
|
||||
bool isExec() const { return flags.isSet(EXEC); }
|
||||
bool isFlatScratch() const { return flags.isSet(FLAT); }
|
||||
|
||||
void
|
||||
setVirtToPhysMapping(std::vector<int> v, std::vector<int> p)
|
||||
{
|
||||
_virtIndices = v;
|
||||
_physIndices = p;
|
||||
|
||||
assert(_virtIndices.size() == _physIndices.size());
|
||||
assert(_numDWords == _physIndices.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* We typically only need the first virtual register for the operand
|
||||
* regardless of its size.
|
||||
*/
|
||||
int virtIdx(int reg_num=0) const { return _virtIndices.at(reg_num); }
|
||||
int physIdx(int reg_num=0) const { return _physIndices.at(reg_num); }
|
||||
|
||||
const std::vector<int>&
|
||||
virtIndices() const
|
||||
{
|
||||
return _virtIndices;
|
||||
}
|
||||
|
||||
const std::vector<int>&
|
||||
physIndices() const
|
||||
{
|
||||
return _physIndices;
|
||||
}
|
||||
|
||||
std::vector<int>&
|
||||
bankReadCounts() const
|
||||
{
|
||||
return _bankReadCounts;
|
||||
}
|
||||
|
||||
typedef uint32_t FlagsType;
|
||||
typedef ::Flags<FlagsType> Flags;
|
||||
|
||||
@@ -128,7 +171,7 @@ class OperandInfo
|
||||
Flags flags;
|
||||
|
||||
/**
|
||||
* Index of the operand as used in registers.cc functions
|
||||
* Value of the operand as used in registers.cc functions
|
||||
*/
|
||||
const int _opSelectorVal;
|
||||
|
||||
@@ -136,6 +179,19 @@ class OperandInfo
|
||||
* Size of the operand in bytes
|
||||
*/
|
||||
const int _size;
|
||||
|
||||
/**
|
||||
* Size of operand in DWords
|
||||
*/
|
||||
const int _numDWords;
|
||||
|
||||
std::vector<int> _virtIndices;
|
||||
std::vector<int> _physIndices;
|
||||
|
||||
/**
|
||||
* The number of reads this operand will make to each bank.
|
||||
*/
|
||||
mutable std::vector<int> _bankReadCounts;
|
||||
};
|
||||
|
||||
#endif // __GPU_COMPUTE_OPERAND_INFO_H__
|
||||
|
||||
@@ -50,51 +50,26 @@ ScalarRegisterFile::ScalarRegisterFile(const ScalarRegisterFileParams &p)
|
||||
bool
|
||||
ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
{
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
|
||||
for (const auto& srcScalarOp : ii->srcScalarRegOperands()) {
|
||||
for (const auto& physIdx : srcScalarOp.physIndices()) {
|
||||
if (regBusy(physIdx))
|
||||
DPRINTF(GPUSRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
|
||||
w->wfDynId, ii->disassemble(), physIdx);
|
||||
w->stats.numTimesBlockedDueRAWDependencies++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
for (int j = 0; j < nRegs; ++j) {
|
||||
int pSgpr =
|
||||
computeUnit->registerManager->mapSgpr(w, sgprIdx + j);
|
||||
|
||||
if (regBusy(pSgpr)) {
|
||||
if (ii->isDstOperand(i)) {
|
||||
w->stats.numTimesBlockedDueWAXDependencies++;
|
||||
} else if (ii->isSrcOperand(i)) {
|
||||
DPRINTF(GPUSRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
|
||||
w->wfDynId, ii->disassemble(), pSgpr);
|
||||
w->stats.numTimesBlockedDueRAWDependencies++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // nRegs
|
||||
} // isScalar
|
||||
} // operand
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
{
|
||||
// iterate over all register destination operands
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
for (int j = 0; j < nRegs; ++j) {
|
||||
int physReg =
|
||||
computeUnit->registerManager->mapSgpr(w, sgprIdx + j);
|
||||
|
||||
// mark the destination scalar register as busy
|
||||
markReg(physReg, true);
|
||||
}
|
||||
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
|
||||
for (const auto& physIdx : dstScalarOp.physIndices()) {
|
||||
// mark the destination scalar register as busy
|
||||
markReg(physIdx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,32 +77,19 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
void
|
||||
ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
{
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
|
||||
int DWORDs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
stats.registerReads += DWORDs;
|
||||
}
|
||||
}
|
||||
stats.registerReads += ii->numSrcScalarDWords();
|
||||
|
||||
if (!ii->isLoad() && !(ii->isAtomic() || ii->isMemSync())) {
|
||||
Cycles delay(computeUnit->scalarPipeLength());
|
||||
Tick tickDelay = computeUnit->cyclesToTicks(delay);
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
int physReg = computeUnit->registerManager->
|
||||
mapSgpr(w, sgprIdx + j);
|
||||
enqRegFreeEvent(physReg, tickDelay);
|
||||
}
|
||||
|
||||
stats.registerWrites += nRegs;
|
||||
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
|
||||
for (const auto& physIdx : dstScalarOp.physIndices()) {
|
||||
enqRegFreeEvent(physIdx, tickDelay);
|
||||
}
|
||||
}
|
||||
|
||||
stats.registerWrites += ii->numDstScalarDWords();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,20 +98,11 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w,
|
||||
GPUDynInstPtr ii)
|
||||
{
|
||||
assert(ii->isLoad() || ii->isAtomicRet());
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
for (int j = 0; j < nRegs; ++j) {
|
||||
int physReg = computeUnit->registerManager->
|
||||
mapSgpr(w, sgprIdx + j);
|
||||
enqRegFreeEvent(physReg, computeUnit->clockPeriod());
|
||||
}
|
||||
|
||||
stats.registerWrites += nRegs;
|
||||
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
|
||||
for (const auto& physIdx : dstScalarOp.physIndices()) {
|
||||
enqRegFreeEvent(physIdx, computeUnit->clockPeriod());
|
||||
}
|
||||
}
|
||||
|
||||
stats.registerWrites += ii->numDstScalarDWords();
|
||||
}
|
||||
|
||||
@@ -57,63 +57,38 @@ VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams &p)
|
||||
bool
|
||||
VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
{
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
|
||||
// determine number of registers
|
||||
int nRegs =
|
||||
ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
int pVgpr = computeUnit->registerManager
|
||||
->mapVgpr(w, vgprIdx + j);
|
||||
if (regBusy(pVgpr)) {
|
||||
if (ii->isDstOperand(i)) {
|
||||
w->stats.numTimesBlockedDueWAXDependencies++;
|
||||
} else if (ii->isSrcOperand(i)) {
|
||||
DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
|
||||
w->wfDynId, ii->disassemble(), pVgpr);
|
||||
w->stats.numTimesBlockedDueRAWDependencies++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
for (const auto& srcVecOp : ii->srcVecRegOperands()) {
|
||||
for (const auto& physIdx : srcVecOp.physIndices()) {
|
||||
if (regBusy(physIdx)) {
|
||||
DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
|
||||
w->wfDynId, ii->disassemble(), physIdx);
|
||||
w->stats.numTimesBlockedDueRAWDependencies++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
{
|
||||
// iterate over all register destination operands
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
for (int j = 0; j < nRegs; ++j) {
|
||||
int physReg = computeUnit->registerManager
|
||||
->mapVgpr(w, vgprIdx + j);
|
||||
|
||||
// If instruction is atomic instruction and
|
||||
// the atomics do not return value, then
|
||||
// do not mark this reg as busy.
|
||||
if (!(ii->isAtomic() && !ii->isAtomicRet())) {
|
||||
/**
|
||||
* if the instruction is a load with EXEC = 0, then
|
||||
* we do not mark the reg. we do this to avoid a
|
||||
* deadlock that can occur because a load reserves
|
||||
* its destination regs before checking its exec mask,
|
||||
* and in the case it is 0, it will not send/recv any
|
||||
* packets, and therefore it will never free its dest
|
||||
* reg(s).
|
||||
*/
|
||||
if (!ii->isLoad() || (ii->isLoad()
|
||||
&& ii->exec_mask.any())) {
|
||||
markReg(physReg, true);
|
||||
}
|
||||
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
|
||||
for (const auto& physIdx : dstVecOp.physIndices()) {
|
||||
// If the instruction is atomic instruciton and the atomics do
|
||||
// not return value, then do not mark this reg as busy.
|
||||
if (!(ii->isAtomic() && !ii->isAtomicRet())) {
|
||||
/**
|
||||
* if the instruction is a load with EXEC = 0, then we do not
|
||||
* mark the reg. We do this to avoid a deadlock that can
|
||||
* occur because a load reserves its destination regs before
|
||||
* checking its exec mask, and in the cas it is 0, it will not
|
||||
* send/recv any packets, and therefore it will never free its
|
||||
* dst reg(s)
|
||||
*/
|
||||
if (!ii->isLoad() || (ii->isLoad() && ii->exec_mask.any())) {
|
||||
markReg(physIdx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -123,53 +98,42 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
void
|
||||
VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
{
|
||||
// increment count of number of DWORDs read from VRF
|
||||
int DWORDs = ii->numSrcVecDWORDs();
|
||||
stats.registerReads += (DWORDs * w->execMask().count());
|
||||
// increment count of number of DWords read from VRF
|
||||
int DWords = ii->numSrcVecDWords();
|
||||
stats.registerReads += (DWords * w->execMask().count());
|
||||
|
||||
uint64_t mask = w->execMask().to_ullong();
|
||||
int srams = w->execMask().size() / 4;
|
||||
for (int i = 0; i < srams; i++) {
|
||||
if (mask & 0xF) {
|
||||
stats.sramReads += DWORDs;
|
||||
stats.sramReads += DWords;
|
||||
}
|
||||
mask = mask >> 4;
|
||||
}
|
||||
|
||||
if (!ii->isLoad()
|
||||
&& !(ii->isAtomic() || ii->isMemSync())) {
|
||||
int opSize = 4;
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->getOperandSize(i) > opSize) {
|
||||
opSize = ii->getOperandSize(i);
|
||||
}
|
||||
}
|
||||
// TODO: compute proper delay
|
||||
// For now, it is based on largest operand size
|
||||
int opSize = ii->maxOperandSize();
|
||||
Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
|
||||
: computeUnit->dpBypassLength());
|
||||
Tick tickDelay = computeUnit->cyclesToTicks(delay);
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
int physReg = computeUnit->registerManager
|
||||
->mapVgpr(w, vgprIdx + j);
|
||||
enqRegFreeEvent(physReg, tickDelay);
|
||||
}
|
||||
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
|
||||
for (const auto& physIdx : dstVecOp.physIndices()) {
|
||||
enqRegFreeEvent(physIdx, tickDelay);
|
||||
}
|
||||
}
|
||||
|
||||
// increment count of number of DWORDs written to VRF
|
||||
DWORDs = ii->numDstVecDWORDs();
|
||||
stats.registerWrites += (DWORDs * w->execMask().count());
|
||||
// increment count of number of DWords written to VRF
|
||||
DWords = ii->numDstVecDWords();
|
||||
stats.registerWrites += (DWords * w->execMask().count());
|
||||
|
||||
mask = w->execMask().to_ullong();
|
||||
srams = w->execMask().size() / 4;
|
||||
for (int i = 0; i < srams; i++) {
|
||||
if (mask & 0xF) {
|
||||
stats.sramWrites += DWORDs;
|
||||
stats.sramWrites += DWords;
|
||||
}
|
||||
mask = mask >> 4;
|
||||
}
|
||||
@@ -181,28 +145,20 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad(
|
||||
Wavefront *w, GPUDynInstPtr ii)
|
||||
{
|
||||
assert(ii->isLoad() || ii->isAtomicRet());
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
for (int j = 0; j < nRegs; ++j) {
|
||||
int physReg = computeUnit->registerManager
|
||||
->mapVgpr(w, vgprIdx + j);
|
||||
enqRegFreeEvent(physReg, computeUnit->clockPeriod());
|
||||
}
|
||||
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
|
||||
for (const auto& physIdx : dstVecOp.physIndices()) {
|
||||
enqRegFreeEvent(physIdx, computeUnit->clockPeriod());
|
||||
}
|
||||
}
|
||||
// increment count of number of DWORDs written to VRF
|
||||
int DWORDs = ii->numDstVecDWORDs();
|
||||
stats.registerWrites += (DWORDs * ii->exec_mask.count());
|
||||
// increment count of number of DWords written to VRF
|
||||
int DWords = ii->numDstVecDWords();
|
||||
stats.registerWrites += (DWords * ii->exec_mask.count());
|
||||
|
||||
uint64_t mask = ii->exec_mask.to_ullong();
|
||||
int srams = ii->exec_mask.size() / 4;
|
||||
for (int i = 0; i < srams; i++) {
|
||||
if (mask & 0xF) {
|
||||
stats.sramWrites += DWORDs;
|
||||
stats.sramWrites += DWords;
|
||||
}
|
||||
mask = mask >> 4;
|
||||
}
|
||||
|
||||
@@ -925,33 +925,30 @@ Wavefront::exec()
|
||||
// number of reads that occur per value written
|
||||
|
||||
// vector RAW dependency tracking
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i)) {
|
||||
int vgpr = ii->getRegisterIndex(i);
|
||||
int nReg = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
for (int n = 0; n < nReg; n++) {
|
||||
if (ii->isSrcOperand(i)) {
|
||||
// This check should never fail, but to be safe we check
|
||||
if (rawDist.find(vgpr+n) != rawDist.end()) {
|
||||
stats.vecRawDistance.sample(
|
||||
stats.numInstrExecuted.value() - rawDist[vgpr+n]);
|
||||
}
|
||||
// increment number of reads to this register
|
||||
vecReads[vgpr+n]++;
|
||||
} else if (ii->isDstOperand(i)) {
|
||||
// rawDist is set on writes, but will not be set
|
||||
// for the first write to each physical register
|
||||
if (rawDist.find(vgpr+n) != rawDist.end()) {
|
||||
// sample the number of reads that were performed
|
||||
stats.readsPerWrite.sample(vecReads[vgpr+n]);
|
||||
}
|
||||
// on a write, reset count of reads to 0
|
||||
vecReads[vgpr+n] = 0;
|
||||
|
||||
rawDist[vgpr+n] = stats.numInstrExecuted.value();
|
||||
}
|
||||
for (const auto& srcVecOp : ii->srcVecRegOperands()) {
|
||||
for (const auto& virtIdx : srcVecOp.virtIndices()) {
|
||||
// This check should never fail, but to be safe we check
|
||||
if (rawDist.find(virtIdx) != rawDist.end()) {
|
||||
stats.vecRawDistance.sample(stats.numInstrExecuted.value() -
|
||||
rawDist[virtIdx]);
|
||||
}
|
||||
// increment number of reads to this register
|
||||
vecReads[virtIdx]++;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
|
||||
for (const auto& virtIdx : dstVecOp.virtIndices()) {
|
||||
// rawDist is set on writes, but will not be set for the first
|
||||
// write to each physical register
|
||||
if (rawDist.find(virtIdx) != rawDist.end()) {
|
||||
// Sample the number of reads that were performed
|
||||
stats.readsPerWrite.sample(vecReads[virtIdx]);
|
||||
}
|
||||
// on a write, reset count of reads to 0
|
||||
vecReads[virtIdx] = 0;
|
||||
|
||||
rawDist[virtIdx] = stats.numInstrExecuted.value();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user