gpu-compute: remove index-based operand access

This commit removes functions that indexed into the
vectors that held the operands. Instead, for-each loops
are used, iterating through one of 6 vectors
(src, dst, srcScalar, srcVec, dstScalar, dstVec)
that all hold various (potentially overlapping)
combinations of the operands.

Change-Id: Ia3a857c8f6675be86c51ba2f77e3d85bfea9ffdb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42212
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Kyle Roarty
2019-07-12 16:31:02 -04:00
committed by Matt Sinclair
parent b40b361bee
commit 2bb8d6bc0c
13 changed files with 682 additions and 689 deletions

View File

@@ -168,11 +168,11 @@ namespace Gcn3ISA
typedef int64_t VecElemI64;
typedef double VecElemF64;
const int DWORDSize = sizeof(VecElemU32);
const int DWordSize = sizeof(VecElemU32);
/**
* Size of a single-precision register in DWORDs.
* Size of a single-precision register in DWords.
*/
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
const int RegSizeDWords = sizeof(VecElemU32) / DWordSize;
// typedefs for the various sizes/types of vector regs
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;

View File

@@ -63,20 +63,21 @@ namespace Gcn3ISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
reg = instData.SSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC1), false, false);
opNum++;
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -150,16 +151,26 @@ namespace Gcn3ISA
int opNum = 0;
// Needed because can't take addr of bitfield
int reg = instData.SIMM16;
operands.emplace_back(reg, getOperandSize(opNum), true,
int reg = instData.SDST;
if (numSrcRegOperands() == getNumOperands()) {
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
opNum++;
}
reg = instData.SIMM16;
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
opNum++;
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
if (numDstRegOperands()){
reg = instData.SDST;
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(reg), false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -237,16 +248,17 @@ namespace Gcn3ISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
if (instData.OP != 0x1C) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
}
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -311,13 +323,14 @@ namespace Gcn3ISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
reg = instData.SSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC1), false, false);
}
int
@@ -385,18 +398,19 @@ namespace Gcn3ISA
if (numSrcRegOperands()) {
// Needed because can't take addr of bitfield
int reg = instData.SIMM16;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
opNum++;
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -496,22 +510,22 @@ namespace Gcn3ISA
if (numSrcRegOperands()) {
reg = instData.SDATA;
if (numSrcRegOperands() == getNumOperands()) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
opNum++;
}
reg = instData.SBASE;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.OFFSET;
if (instData.IMM) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
} else {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
}
opNum++;
@@ -519,11 +533,12 @@ namespace Gcn3ISA
if (numDstRegOperands()) {
reg = instData.SDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(reg), false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -600,35 +615,36 @@ namespace Gcn3ISA
// Needed because can't take addr of bitfield
int reg = instData.SRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
reg = instData.VSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
// VCC read
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
// VDST
reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
opNum++;
// VCC write
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -735,18 +751,19 @@ namespace Gcn3ISA
int reg = instData.SRC0;
if (numSrcRegOperands()) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
}
if (numDstRegOperands()) {
reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -827,20 +844,21 @@ namespace Gcn3ISA
// Needed because can't take addr of bitfield
int reg = instData.SRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
reg = instData.VSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
assert(writesVCC());
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -928,13 +946,13 @@ namespace Gcn3ISA
int numDst = numDstRegOperands() - writesVCC();
for (opNum = 0; opNum < numSrc; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
isScalarReg(srcs[opNum]),
isVectorReg(srcs[opNum]), false);
}
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -942,17 +960,18 @@ namespace Gcn3ISA
if (numDst) {
// Needed because can't take addr of bitfield
int reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
sgprDst, !sgprDst, false);
opNum++;
}
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1048,13 +1067,13 @@ namespace Gcn3ISA
int numDst = numDstRegOperands() - writesVCC();
for (opNum = 0; opNum < numSrc; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
isScalarReg(srcs[opNum]),
isVectorReg(srcs[opNum]), false);
}
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1062,17 +1081,18 @@ namespace Gcn3ISA
if (numDst) {
// Needed because can't take addr of bitfield
int reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
opNum++;
}
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1151,18 +1171,19 @@ namespace Gcn3ISA
int opIdx = 0;
for (opIdx = 0; opIdx < numSrcRegOperands(); opIdx++){
operands.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
srcOps.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
false, true, false);
}
if (numDstRegOperands()) {
// Needed because can't take addr of bitfield
int reg = extData.VDST;
operands.emplace_back(reg, getOperandSize(opIdx), false,
dstOps.emplace_back(reg, getOperandSize(opIdx), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1241,23 +1262,23 @@ namespace Gcn3ISA
if (numSrcRegOperands()) {
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.SOFFSET;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1265,11 +1286,12 @@ namespace Gcn3ISA
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1331,34 +1353,35 @@ namespace Gcn3ISA
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.SOFFSET;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1403,24 +1426,24 @@ namespace Gcn3ISA
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
if (getNumOperands() == 4) {
reg = extData.SSAMP;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1428,11 +1451,12 @@ namespace Gcn3ISA
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1468,7 +1492,7 @@ namespace Gcn3ISA
extData.VSRC2, extData.VSRC3};
for (opNum = 0; opNum < 4; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
false, true, false);
}
@@ -1520,24 +1544,25 @@ namespace Gcn3ISA
assert(isAtomic());
reg = extData.ADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
if (numSrcRegOperands() == 2) {
reg = extData.DATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
if (numDstRegOperands()) {
reg = extData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int

View File

@@ -168,11 +168,11 @@ namespace VegaISA
typedef int64_t VecElemI64;
typedef double VecElemF64;
const int DWORDSize = sizeof(VecElemU32);
const int DWordSize = sizeof(VecElemU32);
/**
* Size of a single-precision register in DWORDs.
* Size of a single-precision register in DWords.
*/
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
const int RegSizeDWords = sizeof(VecElemU32) / DWordSize;
// typedefs for the various sizes/types of vector regs
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;

View File

@@ -63,20 +63,21 @@ namespace VegaISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
reg = instData.SSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC1), false, false);
opNum++;
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -150,16 +151,26 @@ namespace VegaISA
int opNum = 0;
// Needed because can't take addr of bitfield
int reg = instData.SIMM16;
operands.emplace_back(reg, getOperandSize(opNum), true,
int reg = instData.SDST;
if (numSrcRegOperands() == getNumOperands()) {
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
opNum++;
}
reg = instData.SIMM16;
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
opNum++;
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
if (numDstRegOperands()) {
reg = instData.SDST;
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(reg), false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -237,16 +248,17 @@ namespace VegaISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
if (instData.OP != 0x1C) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
}
reg = instData.SDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(instData.SDST), false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -311,13 +323,14 @@ namespace VegaISA
// Needed because can't take addr of bitfield
int reg = instData.SSRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC0), false, false);
opNum++;
reg = instData.SSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(instData.SSRC1), false, false);
}
int
@@ -385,18 +398,19 @@ namespace VegaISA
if (numSrcRegOperands()) {
// Needed because can't take addr of bitfield
int reg = instData.SIMM16;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
opNum++;
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -496,22 +510,22 @@ namespace VegaISA
if (numSrcRegOperands()) {
reg = instData.SDATA;
if (numSrcRegOperands() == getNumOperands()) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
opNum++;
}
reg = instData.SBASE;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.OFFSET;
if (instData.IMM) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, false, true);
} else {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), false, false);
}
opNum++;
@@ -519,11 +533,12 @@ namespace VegaISA
if (numDstRegOperands()) {
reg = instData.SDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
isScalarReg(reg), false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -600,35 +615,36 @@ namespace VegaISA
// Needed because can't take addr of bitfield
int reg = instData.SRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
reg = instData.VSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
// VCC read
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
// VDST
reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
opNum++;
// VCC write
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -735,18 +751,19 @@ namespace VegaISA
int reg = instData.SRC0;
if (numSrcRegOperands()) {
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
}
if (numDstRegOperands()) {
reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -827,20 +844,21 @@ namespace VegaISA
// Needed because can't take addr of bitfield
int reg = instData.SRC0;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
isScalarReg(reg), isVectorReg(reg), false);
opNum++;
reg = instData.VSRC1;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
assert(writesVCC());
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -928,13 +946,13 @@ namespace VegaISA
int numDst = numDstRegOperands() - writesVCC();
for (opNum = 0; opNum < numSrc; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
isScalarReg(srcs[opNum]),
isVectorReg(srcs[opNum]), false);
}
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -942,17 +960,18 @@ namespace VegaISA
if (numDst) {
// Needed because can't take addr of bitfield
int reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
sgprDst, !sgprDst, false);
opNum++;
}
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1047,13 +1066,13 @@ namespace VegaISA
int numDst = numDstRegOperands() - writesVCC();
for (opNum = 0; opNum < numSrc; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
isScalarReg(srcs[opNum]),
isVectorReg(srcs[opNum]), false);
}
if (readsVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1061,17 +1080,18 @@ namespace VegaISA
if (numDst) {
// Needed because can't take addr of bitfield
int reg = instData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
opNum++;
}
if (writesVCC()) {
operands.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false,
true, false, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1150,18 +1170,19 @@ namespace VegaISA
int opIdx = 0;
for (opIdx = 0; opIdx < numSrcRegOperands(); opIdx++){
operands.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
srcOps.emplace_back(srcs[opIdx], getOperandSize(opIdx), true,
false, true, false);
}
if (numDstRegOperands()) {
// Needed because can't take addr of bitfield
int reg = extData.VDST;
operands.emplace_back(reg, getOperandSize(opIdx), false,
dstOps.emplace_back(reg, getOperandSize(opIdx), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1241,23 +1262,23 @@ namespace VegaISA
if (numSrcRegOperands()) {
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.SOFFSET;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1265,11 +1286,12 @@ namespace VegaISA
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1332,34 +1354,35 @@ namespace VegaISA
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
reg = extData.SOFFSET;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1405,24 +1428,24 @@ namespace VegaISA
if (numSrcRegOperands() == getNumOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
reg = extData.VADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
reg = extData.SRSRC;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
if (getNumOperands() == 4) {
reg = extData.SSAMP;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
true, false, false);
opNum++;
}
@@ -1430,11 +1453,12 @@ namespace VegaISA
// extData.VDATA moves in the reg list depending on the instruction
if (numDstRegOperands()) {
reg = extData.VDATA;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int
@@ -1470,7 +1494,7 @@ namespace VegaISA
extData.VSRC2, extData.VSRC3};
for (opNum = 0; opNum < 4; opNum++) {
operands.emplace_back(srcs[opNum], getOperandSize(opNum), true,
srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true,
false, true, false);
}
@@ -1522,24 +1546,25 @@ namespace VegaISA
assert(isAtomic());
reg = extData.ADDR;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
if (numSrcRegOperands() == 2) {
reg = extData.DATA;
operands.emplace_back(reg, getOperandSize(opNum), true,
srcOps.emplace_back(reg, getOperandSize(opNum), true,
false, true, false);
opNum++;
}
if (numDstRegOperands()) {
reg = extData.VDST;
operands.emplace_back(reg, getOperandSize(opNum), false,
dstOps.emplace_back(reg, getOperandSize(opNum), false,
false, true, false);
}
assert(operands.size() == getNumOperands());
assert(srcOps.size() == numSrcRegOperands());
assert(dstOps.size() == numDstRegOperands());
}
int

View File

@@ -557,7 +557,6 @@ FetchUnit::FetchBufDesc::decodeInsts()
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
gpu_dyn_inst->initOperandInfo();
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
@@ -598,7 +597,6 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
gpu_dyn_inst->initOperandInfo();
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "

View File

@@ -45,7 +45,7 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
: GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
(Addr)0), numScalarReqs(0), isSaveRestore(false),
_staticInst(static_inst), _seqNum(instSeqNum),
maxSrcVecRegOpSize(0), maxSrcScalarRegOpSize(0)
maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
{
_staticInst->initOperandInfo();
statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
@@ -83,108 +83,13 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
wg_id = -1;
wfSlotId = -1;
}
}
void
GPUDynInst::initOperandInfo()
{
/**
* Generate and cache the operand to register mapping information. This
* prevents this info from being generated multiple times throughout
* the CU pipeline.
*/
DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
disassemble(), getNumOperands());
for (int op_idx = 0; op_idx < getNumOperands(); ++op_idx) {
int virt_idx(-1);
int phys_idx(-1);
int op_num_dwords(-1);
_staticInst->initDynOperandInfo(wavefront(), computeUnit());
if (isVectorRegister(op_idx)) {
virt_idx = getRegisterIndex(op_idx);
op_num_dwords = numOpdDWORDs(op_idx);
if (isSrcOperand(op_idx)) {
std::vector<int> virt_indices;
std::vector<int> phys_indices;
if (op_num_dwords > maxSrcVecRegOpSize) {
maxSrcVecRegOpSize = op_num_dwords;
}
for (int i = 0; i < op_num_dwords; ++i) {
phys_idx = computeUnit()->registerManager->
mapVgpr(wavefront(), virt_idx + i);
virt_indices.push_back(virt_idx + i);
phys_indices.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding vector src (%d->%d) operand "
"that uses %d registers.\n", disassemble(),
virt_idx, computeUnit()->registerManager->
mapVgpr(wavefront(), virt_idx), op_num_dwords);
srcVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
phys_indices);
} else {
assert(isDstOperand(op_idx));
std::vector<int> virt_indices;
std::vector<int> phys_indices;
for (int i = 0; i < op_num_dwords; ++i) {
phys_idx = computeUnit()->registerManager->
mapVgpr(wavefront(), virt_idx + i);
virt_indices.push_back(virt_idx + i);
phys_indices.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding vector dst (%d->%d) operand "
"that uses %d registers.\n", disassemble(),
virt_idx, computeUnit()->registerManager->
mapVgpr(wavefront(), virt_idx), op_num_dwords);
dstVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
phys_indices);
}
} else if (isScalarRegister(op_idx)) {
virt_idx = getRegisterIndex(op_idx);
op_num_dwords = numOpdDWORDs(op_idx);
if (isSrcOperand(op_idx)) {
std::vector<int> virt_indices;
std::vector<int> phys_indices;
if (op_num_dwords > maxSrcScalarRegOpSize) {
maxSrcScalarRegOpSize = op_num_dwords;
}
for (int i = 0; i < op_num_dwords; ++i) {
phys_idx = computeUnit()->registerManager->
mapSgpr(wavefront(), virt_idx + i);
virt_indices.push_back(virt_idx + i);
phys_indices.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding scalar src (%d->%d) operand "
"that uses %d registers.\n", disassemble(),
virt_idx, computeUnit()->registerManager->
mapSgpr(wavefront(), virt_idx), op_num_dwords);
srcScalarRegOps.emplace_back(op_idx, op_num_dwords,
virt_indices, phys_indices);
} else {
assert(isDstOperand(op_idx));
std::vector<int> virt_indices;
std::vector<int> phys_indices;
for (int i = 0; i < op_num_dwords; ++i) {
phys_idx = computeUnit()->registerManager->
mapSgpr(wavefront(), virt_idx + i);
virt_indices.push_back(virt_idx + i);
phys_indices.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding scalar dst (%d->%d) operand "
"that uses %d registers.\n", disassemble(),
virt_idx, computeUnit()->registerManager->
mapSgpr(wavefront(), virt_idx), op_num_dwords);
dstScalarRegOps.emplace_back(op_idx, op_num_dwords,
virt_indices, phys_indices);
}
}
}
}
GPUDynInst::~GPUDynInst()
@@ -202,6 +107,30 @@ GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
_staticInst->execute(gpuDynInst);
}
const std::vector<OperandInfo>&
GPUDynInst::srcVecRegOperands() const
{
return _staticInst->srcVecRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::dstVecRegOperands() const
{
return _staticInst->dstVecRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::srcScalarRegOperands() const
{
return _staticInst->srcScalarRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::dstScalarRegOperands() const
{
return _staticInst->dstScalarRegOperands();
}
int
GPUDynInst::numSrcRegOperands()
{
@@ -217,152 +146,113 @@ GPUDynInst::numDstRegOperands()
int
GPUDynInst::numSrcVecRegOperands() const
{
return srcVecRegOps.size();
return _staticInst->numSrcVecOperands();
}
int
GPUDynInst::numDstVecRegOperands() const
{
return dstVecRegOps.size();
return _staticInst->numDstVecOperands();
}
int
GPUDynInst::maxSrcVecRegOperandSize() const
GPUDynInst::maxSrcVecRegOperandSize()
{
if (maxSrcVecRegOpSize != -1)
return maxSrcVecRegOpSize;
maxSrcVecRegOpSize = 0;
for (const auto& srcVecOp : srcVecRegOperands())
if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
return maxSrcVecRegOpSize;
}
int
GPUDynInst::numSrcVecDWords()
{
return _staticInst->numSrcVecDWords();
}
int
GPUDynInst::numDstVecDWords()
{
return _staticInst->numDstVecDWords();
}
int
GPUDynInst::numSrcScalarRegOperands() const
{
return srcScalarRegOps.size();
return _staticInst->numSrcScalarOperands();
}
int
GPUDynInst::numDstScalarRegOperands() const
{
return dstScalarRegOps.size();
return _staticInst->numDstScalarOperands();
}
int
GPUDynInst::maxSrcScalarRegOperandSize() const
GPUDynInst::maxSrcScalarRegOperandSize()
{
if (maxSrcScalarRegOpSize != -1)
return maxSrcScalarRegOpSize;
maxSrcScalarRegOpSize = 0;
for (const auto& srcScOp : srcScalarRegOperands())
if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
return maxSrcScalarRegOpSize;
}
int
GPUDynInst::numSrcVecDWORDs()
GPUDynInst::numSrcScalarDWords()
{
return _staticInst->numSrcVecDWORDs();
return _staticInst->numSrcScalarDWords();
}
int
GPUDynInst::numDstVecDWORDs()
GPUDynInst::numDstScalarDWords()
{
return _staticInst->numDstVecDWORDs();
return _staticInst->numDstScalarDWords();
}
int
GPUDynInst::numOpdDWORDs(int operandIdx)
GPUDynInst::maxOperandSize()
{
return _staticInst->numOpdDWORDs(operandIdx);
return _staticInst->maxOperandSize();
}
int
GPUDynInst::getNumOperands()
GPUDynInst::getNumOperands() const
{
return _staticInst->getNumOperands();
}
bool
GPUDynInst::isVectorRegister(int operandIdx)
{
return _staticInst->isVectorRegister(operandIdx);
}
bool
GPUDynInst::isScalarRegister(int operandIdx)
{
return _staticInst->isScalarRegister(operandIdx);
}
int
GPUDynInst::getRegisterIndex(int operandIdx)
{
return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
}
int
GPUDynInst::getOperandSize(int operandIdx)
{
return _staticInst->getOperandSize(operandIdx);
}
bool
GPUDynInst::isDstOperand(int operandIdx)
{
return _staticInst->isDstOperand(operandIdx);
}
bool
GPUDynInst::isSrcOperand(int operandIdx)
{
return _staticInst->isSrcOperand(operandIdx);
}
bool
GPUDynInst::hasSourceSgpr() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isScalarRegister(i) && _staticInst->isSrcOperand(i)) {
return true;
}
}
return false;
}
bool
GPUDynInst::hasSourceVgpr() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isVectorRegister(i) && _staticInst->isSrcOperand(i)) {
return true;
}
}
return false;
}
bool
GPUDynInst::hasDestinationSgpr() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isScalarRegister(i) && _staticInst->isDstOperand(i)) {
return true;
}
}
return false;
}
bool
GPUDynInst::srcIsVgpr(int index) const
{
assert(index >= 0 && index < _staticInst->getNumOperands());
if (_staticInst->isVectorRegister(index) &&
_staticInst->isSrcOperand(index)) {
return true;
}
return false;
return !srcVecRegOperands().empty();
}
bool
GPUDynInst::hasDestinationVgpr() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isVectorRegister(i) && _staticInst->isDstOperand(i)) {
return true;
}
}
return false;
return !dstVecRegOperands().empty();
}
bool
GPUDynInst::hasSourceSgpr() const
{
return !srcScalarRegOperands().empty();
}
bool
GPUDynInst::hasDestinationSgpr() const
{
return !dstScalarRegOperands().empty();
}
bool
@@ -580,12 +470,20 @@ GPUDynInst::writesSCC() const
bool
GPUDynInst::readsVCC() const
{
for (const auto& srcOp : _staticInst->srcOperands())
if (srcOp.isVcc())
return true;
return _staticInst->readsVCC();
}
bool
GPUDynInst::writesVCC() const
{
for (const auto& dstOp : _staticInst->dstOperands())
if (dstOp.isVcc())
return true;
return _staticInst->writesVCC();
}
@@ -602,13 +500,13 @@ GPUDynInst::writesMode() const
}
bool
GPUDynInst::readsEXEC() const
GPUDynInst::readsExec() const
{
return _staticInst->readsEXEC();
}
bool
GPUDynInst::writesEXEC() const
GPUDynInst::writesExec() const
{
return _staticInst->writesEXEC();
}
@@ -622,42 +520,40 @@ GPUDynInst::ignoreExec() const
bool
GPUDynInst::writesExecMask() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
return _staticInst->isDstOperand(i) &&
_staticInst->isExecMaskRegister(i);
}
return false;
for (const auto& dstOp : _staticInst->dstOperands())
if (dstOp.isExec())
return true;
return _staticInst->writesEXEC();
}
bool
GPUDynInst::readsExecMask() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
return _staticInst->isSrcOperand(i) &&
_staticInst->isExecMaskRegister(i);
}
return false;
for (const auto& srcOp : _staticInst->srcOperands())
if (srcOp.isExec())
return true;
return _staticInst->readsEXEC();
}
bool
GPUDynInst::writesFlatScratch() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isScalarRegister(i) && _staticInst->isDstOperand(i)) {
return _staticInst->isFlatScratchRegister(i);
}
}
for (const auto& dstScalarOp : dstScalarRegOperands())
if (dstScalarOp.isFlatScratch())
return true;
return false;
}
bool
GPUDynInst::readsFlatScratch() const
{
for (int i = 0; i < _staticInst->getNumOperands(); ++i) {
if (_staticInst->isScalarRegister(i) && _staticInst->isSrcOperand(i)) {
return _staticInst->isFlatScratchRegister(i);
}
}
for (const auto& srcScalarOp : srcScalarRegOperands())
if (srcScalarOp.isFlatScratch())
return true;
return false;
}

View File

@@ -45,6 +45,7 @@
#include "enums/StorageClassType.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_exec_context.hh"
#include "gpu-compute/operand_info.hh"
class GPUStaticInst;
@@ -89,7 +90,7 @@ class RegisterOperandInfo
/**
* The number of registers required to store this operand.
*/
int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWORDs; }
int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWords; }
int operandIdx() const { return opIdx; }
/**
* We typically only need the first virtual register for the operand
@@ -117,65 +118,42 @@ class GPUDynInst : public GPUExecContext
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum);
~GPUDynInst();
void initOperandInfo();
void execute(GPUDynInstPtr gpuDynInst);
const std::vector<RegisterOperandInfo>&
srcVecRegOperands() const
{
return srcVecRegOps;
}
const std::vector<RegisterOperandInfo>&
dstVecRegOperands() const
{
return dstVecRegOps;
}
const std::vector<RegisterOperandInfo>&
srcScalarRegOperands() const
{
return srcScalarRegOps;
}
const std::vector<RegisterOperandInfo>&
dstScalarRegOperands() const
{
return dstScalarRegOps;
}
int numSrcVecRegOperands() const;
int numDstVecRegOperands() const;
int maxSrcVecRegOperandSize() const;
int numSrcScalarRegOperands() const;
int numDstScalarRegOperands() const;
int maxSrcScalarRegOperandSize() const;
const std::vector<OperandInfo>& srcVecRegOperands() const;
const std::vector<OperandInfo>& dstVecRegOperands() const;
const std::vector<OperandInfo>& srcScalarRegOperands() const;
const std::vector<OperandInfo>& dstScalarRegOperands() const;
int numSrcRegOperands();
int numDstRegOperands();
int numSrcVecDWORDs();
int numDstVecDWORDs();
int numOpdDWORDs(int operandIdx);
int getNumOperands();
bool isVectorRegister(int operandIdx);
bool isScalarRegister(int operandIdx);
int getRegisterIndex(int operandIdx);
int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx);
bool hasDestinationSgpr() const;
int numSrcVecRegOperands() const;
int numDstVecRegOperands() const;
int maxSrcVecRegOperandSize();
int numSrcVecDWords();
int numDstVecDWords();
int numSrcScalarRegOperands() const;
int numDstScalarRegOperands() const;
int maxSrcScalarRegOperandSize();
int numSrcScalarDWords();
int numDstScalarDWords();
int maxOperandSize();
int getNumOperands() const;
bool hasSourceSgpr() const;
bool hasDestinationVgpr() const;
bool hasDestinationSgpr() const;
bool hasSourceVgpr() const;
bool hasDestinationVgpr() const;
// returns true if the string "opcodeStr" is found in the
// opcode of the instruction
bool isOpcode(const std::string& opcodeStr) const;
bool isOpcode(const std::string& opcodeStr,
const std::string& extStr) const;
// returns true if source operand at "index" is a vector register
bool srcIsVgpr(int index) const;
const std::string &disassemble() const;
@@ -264,8 +242,8 @@ class GPUDynInst : public GPUExecContext
bool writesSCC() const;
bool readsVCC() const;
bool writesVCC() const;
bool readsEXEC() const;
bool writesEXEC() const;
bool readsExec() const;
bool writesExec() const;
bool readsMode() const;
bool writesMode() const;
bool ignoreExec() const;
@@ -509,12 +487,6 @@ class GPUDynInst : public GPUExecContext
// hold each cache block address for the instruction and a vector
// to hold the tick when the block arrives at certain hop points
std::map<Addr, std::vector<Tick>> lineAddressTime;
// Operand info.
std::vector<RegisterOperandInfo> srcVecRegOps;
std::vector<RegisterOperandInfo> dstVecRegOps;
std::vector<RegisterOperandInfo> srcScalarRegOps;
std::vector<RegisterOperandInfo> dstScalarRegOps;
};
#endif // __GPU_DYN_INST_HH__

View File

@@ -33,10 +33,12 @@
#include "gpu-compute/gpu_static_inst.hh"
#include "debug/GPUInst.hh"
GPUStaticInst::GPUStaticInst(const std::string &opcode)
: executed_as(Enums::SC_NONE), _opcode(opcode),
_instNum(0), _instAddr(0), srcVecOperands(-1), dstVecOperands(-1),
srcVecDWORDs(-1), dstVecDWORDs(-1)
_instNum(0), _instAddr(0), srcVecDWords(-1), dstVecDWords(-1),
srcScalarDWords(-1), dstScalarDWords(-1), maxOpSize(-1)
{
}
@@ -51,79 +53,160 @@ GPUStaticInst::disassemble()
return disassembly;
}
void
GPUStaticInst::initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
{
// Lambda function, as this is only ever used here
auto generateVirtToPhysMap = [&](OperandInfo& op,
std::vector<OperandInfo>& opVec,
MapRegFn mapFn, OpType opType)
{
std::vector<int> virt_idxs;
std::vector<int> phys_idxs;
int num_dwords = op.sizeInDWords();
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
int phys_idx = -1;
for (int i = 0; i < num_dwords; i++){
phys_idx = (cu->registerManager->*mapFn)(wf, virt_idx + i);
virt_idxs.push_back(virt_idx + i);
phys_idxs.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
"%d registers.\n", disassemble(),
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
"vector" : "scalar",
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
opVec.emplace_back(op);
};
for (auto& srcOp : srcOps) {
if (srcOp.isVectorReg()) {
generateVirtToPhysMap(srcOp, srcVecRegOps,
&RegisterManager::mapVgpr, OpType::SRC_VEC);
} else if (srcOp.isScalarReg()) {
generateVirtToPhysMap(srcOp, srcScalarRegOps,
&RegisterManager::mapSgpr, OpType::SRC_SCALAR);
}
}
for (auto& dstOp : dstOps) {
if (dstOp.isVectorReg()) {
generateVirtToPhysMap(dstOp, dstVecRegOps,
&RegisterManager::mapVgpr, OpType::DST_VEC);
} else if (dstOp.isScalarReg()) {
generateVirtToPhysMap(dstOp, dstScalarRegOps,
&RegisterManager::mapSgpr, OpType::DST_SCALAR);
}
}
}
int
GPUStaticInst::numSrcVecOperands()
{
if (srcVecOperands > -1)
return srcVecOperands;
srcVecOperands = 0;
if (!isScalar()) {
for (int k = 0; k < getNumOperands(); ++k) {
if (isVectorRegister(k) && isSrcOperand(k))
srcVecOperands++;
}
}
return srcVecOperands;
return srcVecRegOps.size();
}
int
GPUStaticInst::numDstVecOperands()
{
if (dstVecOperands > -1)
return dstVecOperands;
dstVecOperands = 0;
if (!isScalar()) {
for (int k = 0; k < getNumOperands(); ++k) {
if (isVectorRegister(k) && isDstOperand(k))
dstVecOperands++;
}
}
return dstVecOperands;
return dstVecRegOps.size();
}
int
GPUStaticInst::numSrcVecDWORDs()
GPUStaticInst::numSrcVecDWords()
{
if (srcVecDWORDs > -1) {
return srcVecDWORDs;
if (srcVecDWords != -1) {
return srcVecDWords;
}
srcVecDWORDs = 0;
if (!isScalar()) {
for (int i = 0; i < getNumOperands(); i++) {
if (isVectorRegister(i) && isSrcOperand(i)) {
int dwords = numOpdDWORDs(i);
srcVecDWORDs += dwords;
}
}
}
return srcVecDWORDs;
srcVecDWords = 0;
for (const auto& srcOp : srcOps)
if (srcOp.isVectorReg())
srcVecDWords += srcOp.sizeInDWords();
return srcVecDWords;
}
int
GPUStaticInst::numDstVecDWORDs()
GPUStaticInst::numDstVecDWords()
{
if (dstVecDWORDs > -1) {
return dstVecDWORDs;
if (dstVecDWords != -1) {
return dstVecDWords;
}
dstVecDWORDs = 0;
if (!isScalar()) {
for (int i = 0; i < getNumOperands(); i++) {
if (isVectorRegister(i) && isDstOperand(i)) {
int dwords = numOpdDWORDs(i);
dstVecDWORDs += dwords;
}
}
}
return dstVecDWORDs;
dstVecDWords = 0;
for (const auto& dstOp : dstOps)
if (dstOp.isVectorReg())
dstVecDWords += dstOp.sizeInDWords();
return dstVecDWords;
}
int
GPUStaticInst::numOpdDWORDs(int operandIdx)
GPUStaticInst::numSrcScalarOperands()
{
return getOperandSize(operandIdx) <= 4 ? 1
: getOperandSize(operandIdx) / 4;
return srcScalarRegOps.size();
}
int
GPUStaticInst::numDstScalarOperands()
{
return dstScalarRegOps.size();
}
int
GPUStaticInst::numSrcScalarDWords()
{
if (srcScalarDWords != -1)
return srcScalarDWords;
srcScalarDWords = 0;
for (const auto& srcOp : srcOps)
if (srcOp.isScalarReg())
srcScalarDWords += srcOp.sizeInDWords();
return srcScalarDWords;
}
int
GPUStaticInst::numDstScalarDWords()
{
if (dstScalarDWords != -1)
return dstScalarDWords;
dstScalarDWords = 0;
for (const auto& dstOp : dstOps)
if (dstOp.isScalarReg())
dstScalarDWords += dstOp.sizeInDWords();
return dstScalarDWords;
}
int
GPUStaticInst::maxOperandSize()
{
if (maxOpSize != -1)
return maxOpSize;
maxOpSize = 0;
for (const auto& dstOp : dstOps)
if (dstOp.size() > maxOpSize)
maxOpSize = dstOp.size();
for (const auto& srcOp : srcOps)
if (srcOp.size() > maxOpSize)
maxOpSize = srcOp.size();
return maxOpSize;
}

View File

@@ -52,10 +52,10 @@
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
#include "gpu-compute/operand_info.hh"
#include "gpu-compute/wavefront.hh"
class BaseOperand;
class BaseRegOperand;
class Wavefront;
class GPUStaticInst : public GPUStaticInstFlags
{
@@ -76,41 +76,33 @@ class GPUStaticInst : public GPUStaticInstFlags
virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu);
virtual void initOperandInfo() = 0;
virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
virtual void generateDisassembly() = 0;
const std::string& disassemble();
virtual int getNumOperands() = 0;
bool isScalarRegister(int operandIndex)
{ return operands[operandIndex].isScalarReg(); }
bool isVectorRegister(int operandIndex)
{ return operands[operandIndex].isVectorReg(); }
bool isSrcOperand(int operandIndex)
{ return operands[operandIndex].isSrc(); }
bool isDstOperand(int operandIndex)
{ return operands[operandIndex].isDst(); }
virtual bool isFlatScratchRegister(int opIdx) = 0;
virtual bool isExecMaskRegister(int opIdx) = 0;
virtual int getOperandSize(int operandIndex) = 0;
int getRegisterIndex(int operandIndex, int num_scalar_regs)
{ return operands[operandIndex].registerIndex(num_scalar_regs); }
virtual int numDstRegOperands() = 0;
virtual int numSrcRegOperands() = 0;
virtual int coalescerTokenCount() const { return 0; }
int numDstVecOperands();
int numSrcVecOperands();
int numDstVecDWORDs();
int numSrcVecDWORDs();
int numDstVecOperands();
int numSrcVecDWords();
int numDstVecDWords();
int numOpdDWORDs(int operandIdx);
int numSrcScalarOperands();
int numDstScalarOperands();
int numSrcScalarDWords();
int numDstScalarDWords();
int maxOperandSize();
virtual int coalescerTokenCount() const { return 0; }
bool isALU() const { return _flags[ALU]; }
bool isBranch() const { return _flags[Branch]; }
@@ -268,18 +260,58 @@ class GPUStaticInst : public GPUStaticInstFlags
}
const std::string& opcode() const { return _opcode; }
const std::vector<OperandInfo>& srcOperands() const { return srcOps; }
const std::vector<OperandInfo>& dstOperands() const { return dstOps; }
const std::vector<OperandInfo>&
srcVecRegOperands() const
{
return srcVecRegOps;
}
const std::vector<OperandInfo>&
dstVecRegOperands() const
{
return dstVecRegOps;
}
const std::vector<OperandInfo>&
srcScalarRegOperands() const
{
return srcScalarRegOps;
}
const std::vector<OperandInfo>&
dstScalarRegOperands() const
{
return dstScalarRegOps;
}
// These next 2 lines are used in initDynOperandInfo to let the lambda
// function work
typedef int (RegisterManager::*MapRegFn)(Wavefront *, int);
enum OpType { SRC_VEC, SRC_SCALAR, DST_VEC, DST_SCALAR };
protected:
const std::string _opcode;
std::string disassembly;
int _instNum;
int _instAddr;
std::vector<OperandInfo> operands;
std::vector<OperandInfo> srcOps;
std::vector<OperandInfo> dstOps;
private:
int srcVecOperands;
int dstVecOperands;
int srcVecDWORDs;
int dstVecDWORDs;
int srcVecDWords;
int dstVecDWords;
int srcScalarDWords;
int dstScalarDWords;
int maxOpSize;
std::vector<OperandInfo> srcVecRegOps;
std::vector<OperandInfo> dstVecRegOps;
std::vector<OperandInfo> srcScalarRegOps;
std::vector<OperandInfo> dstScalarRegOps;
/**
* Identifier of the immediate post-dominator instruction.
*/

View File

@@ -44,7 +44,8 @@ class OperandInfo
OperandInfo() = delete;
OperandInfo(int opSelectorVal, int size, bool src, bool scalar_reg,
bool vector_reg, bool imm)
: _opSelectorVal(opSelectorVal), _size(size)
: _opSelectorVal(opSelectorVal), _size(size),
_numDWords(size <= 4 ? 1 : size / 4)
{
if (src)
flags.set(SRC);
@@ -68,7 +69,14 @@ class OperandInfo
flags.set(POS_CONST);
}
int numRegisters() const { return _numDWords / TheGpuISA::RegSizeDWords; }
int sizeInDWords() const { return _numDWords; }
int size() const { return _size; }
// Certain opIdx's get changed in calls to opSelectorToRegIdx
// This avoids that by returning the exact value
int rawRegisterIndex() const { return _opSelectorVal; }
int
registerIndex(int numScalarRegs) const
{
@@ -88,6 +96,41 @@ class OperandInfo
bool isExec() const { return flags.isSet(EXEC); }
bool isFlatScratch() const { return flags.isSet(FLAT); }
void
setVirtToPhysMapping(std::vector<int> v, std::vector<int> p)
{
_virtIndices = v;
_physIndices = p;
assert(_virtIndices.size() == _physIndices.size());
assert(_numDWords == _physIndices.size());
}
/**
* We typically only need the first virtual register for the operand
* regardless of its size.
*/
int virtIdx(int reg_num=0) const { return _virtIndices.at(reg_num); }
int physIdx(int reg_num=0) const { return _physIndices.at(reg_num); }
const std::vector<int>&
virtIndices() const
{
return _virtIndices;
}
const std::vector<int>&
physIndices() const
{
return _physIndices;
}
std::vector<int>&
bankReadCounts() const
{
return _bankReadCounts;
}
typedef uint32_t FlagsType;
typedef ::Flags<FlagsType> Flags;
@@ -128,7 +171,7 @@ class OperandInfo
Flags flags;
/**
* Index of the operand as used in registers.cc functions
* Value of the operand as used in registers.cc functions
*/
const int _opSelectorVal;
@@ -136,6 +179,19 @@ class OperandInfo
* Size of the operand in bytes
*/
const int _size;
/**
* Size of operand in DWords
*/
const int _numDWords;
std::vector<int> _virtIndices;
std::vector<int> _physIndices;
/**
* The number of reads this operand will make to each bank.
*/
mutable std::vector<int> _bankReadCounts;
};
#endif // __GPU_COMPUTE_OPERAND_INFO_H__

View File

@@ -50,51 +50,26 @@ ScalarRegisterFile::ScalarRegisterFile(const ScalarRegisterFileParams &p)
bool
ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
{
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
for (const auto& srcScalarOp : ii->srcScalarRegOperands()) {
for (const auto& physIdx : srcScalarOp.physIndices()) {
if (regBusy(physIdx))
DPRINTF(GPUSRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
w->wfDynId, ii->disassemble(), physIdx);
w->stats.numTimesBlockedDueRAWDependencies++;
return false;
}
}
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; ++j) {
int pSgpr =
computeUnit->registerManager->mapSgpr(w, sgprIdx + j);
if (regBusy(pSgpr)) {
if (ii->isDstOperand(i)) {
w->stats.numTimesBlockedDueWAXDependencies++;
} else if (ii->isSrcOperand(i)) {
DPRINTF(GPUSRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
w->wfDynId, ii->disassemble(), pSgpr);
w->stats.numTimesBlockedDueRAWDependencies++;
}
return false;
}
} // nRegs
} // isScalar
} // operand
return true;
}
void
ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
{
// iterate over all register destination operands
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; ++j) {
int physReg =
computeUnit->registerManager->mapSgpr(w, sgprIdx + j);
// mark the destination scalar register as busy
markReg(physReg, true);
}
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
for (const auto& physIdx : dstScalarOp.physIndices()) {
// mark the destination scalar register as busy
markReg(physIdx, true);
}
}
}
@@ -102,32 +77,19 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
void
ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
{
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
int DWORDs = ii->getOperandSize(i) <= 4 ? 1
: ii->getOperandSize(i) / 4;
stats.registerReads += DWORDs;
}
}
stats.registerReads += ii->numSrcScalarDWords();
if (!ii->isLoad() && !(ii->isAtomic() || ii->isMemSync())) {
Cycles delay(computeUnit->scalarPipeLength());
Tick tickDelay = computeUnit->cyclesToTicks(delay);
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1
: ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; j++) {
int physReg = computeUnit->registerManager->
mapSgpr(w, sgprIdx + j);
enqRegFreeEvent(physReg, tickDelay);
}
stats.registerWrites += nRegs;
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
for (const auto& physIdx : dstScalarOp.physIndices()) {
enqRegFreeEvent(physIdx, tickDelay);
}
}
stats.registerWrites += ii->numDstScalarDWords();
}
}
@@ -136,20 +98,11 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w,
GPUDynInstPtr ii)
{
assert(ii->isLoad() || ii->isAtomicRet());
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; ++j) {
int physReg = computeUnit->registerManager->
mapSgpr(w, sgprIdx + j);
enqRegFreeEvent(physReg, computeUnit->clockPeriod());
}
stats.registerWrites += nRegs;
for (const auto& dstScalarOp : ii->dstScalarRegOperands()) {
for (const auto& physIdx : dstScalarOp.physIndices()) {
enqRegFreeEvent(physIdx, computeUnit->clockPeriod());
}
}
stats.registerWrites += ii->numDstScalarDWords();
}

View File

@@ -57,63 +57,38 @@ VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams &p)
bool
VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
{
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i);
// determine number of registers
int nRegs =
ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; j++) {
int pVgpr = computeUnit->registerManager
->mapVgpr(w, vgprIdx + j);
if (regBusy(pVgpr)) {
if (ii->isDstOperand(i)) {
w->stats.numTimesBlockedDueWAXDependencies++;
} else if (ii->isSrcOperand(i)) {
DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
w->wfDynId, ii->disassemble(), pVgpr);
w->stats.numTimesBlockedDueRAWDependencies++;
}
return false;
}
for (const auto& srcVecOp : ii->srcVecRegOperands()) {
for (const auto& physIdx : srcVecOp.physIndices()) {
if (regBusy(physIdx)) {
DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
w->wfDynId, ii->disassemble(), physIdx);
w->stats.numTimesBlockedDueRAWDependencies++;
return false;
}
}
}
return true;
}
void
VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
{
// iterate over all register destination operands
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; ++j) {
int physReg = computeUnit->registerManager
->mapVgpr(w, vgprIdx + j);
// If instruction is atomic instruction and
// the atomics do not return value, then
// do not mark this reg as busy.
if (!(ii->isAtomic() && !ii->isAtomicRet())) {
/**
* if the instruction is a load with EXEC = 0, then
* we do not mark the reg. we do this to avoid a
* deadlock that can occur because a load reserves
* its destination regs before checking its exec mask,
* and in the case it is 0, it will not send/recv any
* packets, and therefore it will never free its dest
* reg(s).
*/
if (!ii->isLoad() || (ii->isLoad()
&& ii->exec_mask.any())) {
markReg(physReg, true);
}
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
for (const auto& physIdx : dstVecOp.physIndices()) {
// If the instruction is atomic instruciton and the atomics do
// not return value, then do not mark this reg as busy.
if (!(ii->isAtomic() && !ii->isAtomicRet())) {
/**
* if the instruction is a load with EXEC = 0, then we do not
* mark the reg. We do this to avoid a deadlock that can
* occur because a load reserves its destination regs before
* checking its exec mask, and in the cas it is 0, it will not
* send/recv any packets, and therefore it will never free its
* dst reg(s)
*/
if (!ii->isLoad() || (ii->isLoad() && ii->exec_mask.any())) {
markReg(physIdx, true);
}
}
}
@@ -123,53 +98,42 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
void
VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
{
// increment count of number of DWORDs read from VRF
int DWORDs = ii->numSrcVecDWORDs();
stats.registerReads += (DWORDs * w->execMask().count());
// increment count of number of DWords read from VRF
int DWords = ii->numSrcVecDWords();
stats.registerReads += (DWords * w->execMask().count());
uint64_t mask = w->execMask().to_ullong();
int srams = w->execMask().size() / 4;
for (int i = 0; i < srams; i++) {
if (mask & 0xF) {
stats.sramReads += DWORDs;
stats.sramReads += DWords;
}
mask = mask >> 4;
}
if (!ii->isLoad()
&& !(ii->isAtomic() || ii->isMemSync())) {
int opSize = 4;
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->getOperandSize(i) > opSize) {
opSize = ii->getOperandSize(i);
}
}
// TODO: compute proper delay
// For now, it is based on largest operand size
int opSize = ii->maxOperandSize();
Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
: computeUnit->dpBypassLength());
Tick tickDelay = computeUnit->cyclesToTicks(delay);
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1
: ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; j++) {
int physReg = computeUnit->registerManager
->mapVgpr(w, vgprIdx + j);
enqRegFreeEvent(physReg, tickDelay);
}
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
for (const auto& physIdx : dstVecOp.physIndices()) {
enqRegFreeEvent(physIdx, tickDelay);
}
}
// increment count of number of DWORDs written to VRF
DWORDs = ii->numDstVecDWORDs();
stats.registerWrites += (DWORDs * w->execMask().count());
// increment count of number of DWords written to VRF
DWords = ii->numDstVecDWords();
stats.registerWrites += (DWords * w->execMask().count());
mask = w->execMask().to_ullong();
srams = w->execMask().size() / 4;
for (int i = 0; i < srams; i++) {
if (mask & 0xF) {
stats.sramWrites += DWORDs;
stats.sramWrites += DWords;
}
mask = mask >> 4;
}
@@ -181,28 +145,20 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad(
Wavefront *w, GPUDynInstPtr ii)
{
assert(ii->isLoad() || ii->isAtomicRet());
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; ++j) {
int physReg = computeUnit->registerManager
->mapVgpr(w, vgprIdx + j);
enqRegFreeEvent(physReg, computeUnit->clockPeriod());
}
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
for (const auto& physIdx : dstVecOp.physIndices()) {
enqRegFreeEvent(physIdx, computeUnit->clockPeriod());
}
}
// increment count of number of DWORDs written to VRF
int DWORDs = ii->numDstVecDWORDs();
stats.registerWrites += (DWORDs * ii->exec_mask.count());
// increment count of number of DWords written to VRF
int DWords = ii->numDstVecDWords();
stats.registerWrites += (DWords * ii->exec_mask.count());
uint64_t mask = ii->exec_mask.to_ullong();
int srams = ii->exec_mask.size() / 4;
for (int i = 0; i < srams; i++) {
if (mask & 0xF) {
stats.sramWrites += DWORDs;
stats.sramWrites += DWords;
}
mask = mask >> 4;
}

View File

@@ -925,33 +925,30 @@ Wavefront::exec()
// number of reads that occur per value written
// vector RAW dependency tracking
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isVectorRegister(i)) {
int vgpr = ii->getRegisterIndex(i);
int nReg = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int n = 0; n < nReg; n++) {
if (ii->isSrcOperand(i)) {
// This check should never fail, but to be safe we check
if (rawDist.find(vgpr+n) != rawDist.end()) {
stats.vecRawDistance.sample(
stats.numInstrExecuted.value() - rawDist[vgpr+n]);
}
// increment number of reads to this register
vecReads[vgpr+n]++;
} else if (ii->isDstOperand(i)) {
// rawDist is set on writes, but will not be set
// for the first write to each physical register
if (rawDist.find(vgpr+n) != rawDist.end()) {
// sample the number of reads that were performed
stats.readsPerWrite.sample(vecReads[vgpr+n]);
}
// on a write, reset count of reads to 0
vecReads[vgpr+n] = 0;
rawDist[vgpr+n] = stats.numInstrExecuted.value();
}
for (const auto& srcVecOp : ii->srcVecRegOperands()) {
for (const auto& virtIdx : srcVecOp.virtIndices()) {
// This check should never fail, but to be safe we check
if (rawDist.find(virtIdx) != rawDist.end()) {
stats.vecRawDistance.sample(stats.numInstrExecuted.value() -
rawDist[virtIdx]);
}
// increment number of reads to this register
vecReads[virtIdx]++;
}
}
for (const auto& dstVecOp : ii->dstVecRegOperands()) {
for (const auto& virtIdx : dstVecOp.virtIndices()) {
// rawDist is set on writes, but will not be set for the first
// write to each physical register
if (rawDist.find(virtIdx) != rawDist.end()) {
// Sample the number of reads that were performed
stats.readsPerWrite.sample(vecReads[virtIdx]);
}
// on a write, reset count of reads to 0
vecReads[virtIdx] = 0;
rawDist[virtIdx] = stats.numInstrExecuted.value();
}
}