gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model

Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
2018-05-01 16:59:35 -04:00
parent b0eac7857a
commit b8da9abba7
86 changed files with 10299 additions and 3734 deletions
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -59,6 +59,7 @@ class GPUStaticInst : public GPUStaticInstFlags
 {
  public:
    GPUStaticInst(const std::string &opcode);
+    virtual ~GPUStaticInst() { }
    void instAddr(int inst_addr) { _instAddr = inst_addr; }
    int instAddr() const { return _instAddr; }
    int nextInstAddr() const { return _instAddr + instSize(); }
@@ -71,15 +72,18 @@ class GPUStaticInst : public GPUStaticInstFlags

    int ipdInstNum() const { return _ipdInstNum; }

+    virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
+
    virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
    virtual void generateDisassembly() = 0;
    const std::string& disassemble();
    virtual int getNumOperands() = 0;
-    virtual bool isCondRegister(int operandIndex) = 0;
    virtual bool isScalarRegister(int operandIndex) = 0;
    virtual bool isVectorRegister(int operandIndex) = 0;
    virtual bool isSrcOperand(int operandIndex) = 0;
    virtual bool isDstOperand(int operandIndex) = 0;
+    virtual bool isFlatScratchRegister(int opIdx) = 0;
+    virtual bool isExecMaskRegister(int opIdx) = 0;
    virtual int getOperandSize(int operandIndex) = 0;

    virtual int getRegisterIndex(int operandIndex,
@@ -88,12 +92,24 @@ class GPUStaticInst : public GPUStaticInstFlags
    virtual int numDstRegOperands() = 0;
    virtual int numSrcRegOperands() = 0;

-    virtual bool isValid() const = 0;
+    virtual int coalescerTokenCount() const { return 0; }
+
+    int numDstVecOperands();
+    int numSrcVecOperands();
+    int numDstVecDWORDs();
+    int numSrcVecDWORDs();
+
+    int numOpdDWORDs(int operandIdx);

    bool isALU() const { return _flags[ALU]; }
    bool isBranch() const { return _flags[Branch]; }
+    bool isCondBranch() const { return _flags[CondBranch]; }
    bool isNop() const { return _flags[Nop]; }
    bool isReturn() const { return _flags[Return]; }
+    bool isEndOfKernel() const { return _flags[EndOfKernel]; }
+    bool isKernelLaunch() const { return _flags[KernelLaunch]; }
+    bool isSDWAInst() const { return _flags[IsSDWA]; }
+    bool isDPPInst() const { return _flags[IsDPP]; }

    bool
    isUnconditionalJump() const
@@ -105,7 +121,7 @@ class GPUStaticInst : public GPUStaticInstFlags
    bool isWaitcnt() const { return _flags[Waitcnt]; }

    bool isBarrier() const { return _flags[MemBarrier]; }
-    bool isMemFence() const { return _flags[MemFence]; }
+    bool isMemSync() const { return _flags[MemSync]; }
    bool isMemRef() const { return _flags[MemoryRef]; }
    bool isFlat() const { return _flags[Flat]; }
    bool isLoad() const { return _flags[Load]; }
@@ -125,6 +141,13 @@ class GPUStaticInst : public GPUStaticInstFlags
    bool writesSCC() const { return _flags[WritesSCC]; }
    bool readsVCC() const { return _flags[ReadsVCC]; }
    bool writesVCC() const { return _flags[WritesVCC]; }
+    // Identify instructions that implicitly read the Execute mask
+    // as a source operand but not to dictate which threads execute.
+    bool readsEXEC() const { return _flags[ReadsEXEC]; }
+    bool writesEXEC() const { return _flags[WritesEXEC]; }
+    bool readsMode() const { return _flags[ReadsMode]; }
+    bool writesMode() const { return _flags[WritesMode]; }
+    bool ignoreExec() const { return _flags[IgnoreExec]; }

    bool isAtomicAnd() const { return _flags[AtomicAnd]; }
    bool isAtomicOr() const { return _flags[AtomicOr]; }
@@ -166,34 +189,29 @@ class GPUStaticInst : public GPUStaticInstFlags
    bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
    bool isSpillSeg() const { return _flags[SpillSegment]; }

-    bool isWorkitemScope() const { return _flags[WorkitemScope]; }
-    bool isWavefrontScope() const { return _flags[WavefrontScope]; }
-    bool isWorkgroupScope() const { return _flags[WorkgroupScope]; }
-    bool isDeviceScope() const { return _flags[DeviceScope]; }
-    bool isSystemScope() const { return _flags[SystemScope]; }
-    bool isNoScope() const { return _flags[NoScope]; }
-
-    bool isRelaxedOrder() const { return _flags[RelaxedOrder]; }
-    bool isAcquire() const { return _flags[Acquire]; }
-    bool isRelease() const { return _flags[Release]; }
-    bool isAcquireRelease() const { return _flags[AcquireRelease]; }
-    bool isNoOrder() const { return _flags[NoOrder]; }
-
    /**
-     * Coherence domain of a memory instruction. Only valid for
-     * machine ISA. The coherence domain specifies where it is
-     * possible to perform memory synchronization, e.g., acquire
-     * or release, from the shader kernel.
+     * Coherence domain of a memory instruction. The coherence domain
+     * specifies where it is possible to perform memory synchronization
+     * (e.g., acquire or release) from the shader kernel.
     *
-     * isGloballyCoherent(): returns true if kernel is sharing memory
-     * with other work-items on the same device (GPU)
+     * isGloballyCoherent(): returns true if WIs share same device
+     * isSystemCoherent(): returns true if WIs or threads in different
+     *                     devices share memory
     *
-     * isSystemCoherent(): returns true if kernel is sharing memory
-     * with other work-items on a different device (GPU) or the host (CPU)
     */
    bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
    bool isSystemCoherent() const { return _flags[SystemCoherent]; }

+    // Floating-point instructions
+    bool isF16() const { return _flags[F16]; }
+    bool isF32() const { return _flags[F32]; }
+    bool isF64() const { return _flags[F64]; }
+
+    // FMA, MAC, MAD instructions
+    bool isFMA() const { return _flags[FMA]; }
+    bool isMAC() const { return _flags[MAC]; }
+    bool isMAD() const { return _flags[MAD]; }
+
    virtual int instSize() const = 0;

    // only used for memory instructions
@@ -217,37 +235,36 @@ class GPUStaticInst : public GPUStaticInstFlags
    // For flat memory accesses
    Enums::StorageClassType executed_as;

-    void setFlag(Flags flag) { _flags[flag] = true; }
+    void setFlag(Flags flag) {
+        _flags[flag] = true;

-    virtual void
-    execLdAcq(GPUDynInstPtr gpuDynInst)
-    {
-        fatal("calling execLdAcq() on a non-load instruction.\n");
-    }
-
-    virtual void
-    execSt(GPUDynInstPtr gpuDynInst)
-    {
-        fatal("calling execLdAcq() on a non-load instruction.\n");
-    }
-
-    virtual void
-    execAtomic(GPUDynInstPtr gpuDynInst)
-    {
-        fatal("calling execAtomic() on a non-atomic instruction.\n");
-    }
-
-    virtual void
-    execAtomicAcq(GPUDynInstPtr gpuDynInst)
-    {
-        fatal("calling execAtomicAcq() on a non-atomic instruction.\n");
+        if (isGroupSeg()) {
+            executed_as = Enums::SC_GROUP;
+        } else if (isGlobalSeg()) {
+            executed_as = Enums::SC_GLOBAL;
+        } else if (isPrivateSeg()) {
+            executed_as = Enums::SC_PRIVATE;
+        } else if (isSpillSeg()) {
+            executed_as = Enums::SC_SPILL;
+        } else if (isReadOnlySeg()) {
+            executed_as = Enums::SC_READONLY;
+        } else if (isKernArgSeg()) {
+            executed_as = Enums::SC_KERNARG;
+        } else if (isArgSeg()) {
+            executed_as = Enums::SC_ARG;
+        }
    }
+    const std::string& opcode() const { return _opcode; }

  protected:
-    const std::string opcode;
+    const std::string _opcode;
    std::string disassembly;
    int _instNum;
    int _instAddr;
+    int srcVecOperands;
+    int dstVecOperands;
+    int srcVecDWORDs;
+    int dstVecDWORDs;
    /**
     * Identifier of the immediate post-dominator instruction.
     */
@@ -262,9 +279,9 @@ class KernelLaunchStaticInst : public GPUStaticInst
    KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
    {
        setFlag(Nop);
+        setFlag(KernelLaunch);
+        setFlag(MemSync);
        setFlag(Scalar);
-        setFlag(Acquire);
-        setFlag(SystemScope);
        setFlag(GlobalSegment);
    }

@@ -277,11 +294,14 @@ class KernelLaunchStaticInst : public GPUStaticInst
    void
    generateDisassembly() override
    {
-        disassembly = opcode;
+        disassembly = _opcode;
    }

    int getNumOperands() override { return 0; }
-    bool isCondRegister(int operandIndex) override { return false; }
+    bool isFlatScratchRegister(int opIdx) override { return false; }
+    // return true if the Execute mask is explicitly used as a source
+    // register operand
+    bool isExecMaskRegister(int opIdx) override { return false; }
    bool isScalarRegister(int operandIndex) override { return false; }
    bool isVectorRegister(int operandIndex) override { return false; }
    bool isSrcOperand(int operandIndex) override { return false; }
@@ -296,7 +316,6 @@ class KernelLaunchStaticInst : public GPUStaticInst

    int numDstRegOperands() override { return 0; }
    int numSrcRegOperands() override { return 0; }
-    bool isValid() const override { return true; }
    int instSize() const override { return 0; }
 };