cpu: Only acquire needed tokens in PTL tester

The tester currently assumes that one token per lane is needed when checking if an action is ready to be issued. When actually issuing requests, it is possible that a memory location is not valid for various reasons. This was not being considered when checking for tokens causing the tester to acquire more tokens than requests sent. Since tokens are returned when requests are coalesced, this was causing some tokens never to be returned, eventually depleting the token pool and causing a deadlock. Add a new method which determines the number of tokens needed for an action. This is called by both the ready check method and the method to issue the action to ensure they are aligned. Change-Id: Ic1af72085c3b077539eb3fd129331e776ebdffbc Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/56450 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
2022-02-03 16:50:18 -06:00
parent 1bc23ca966
commit 5530bd42d5
2 changed files with 47 additions and 6 deletions
--- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc
@@ -152,6 +152,37 @@ TesterThread::issueNewEpisode()
    episodeHistory.push_back(curEpisode);
 }

+int
+TesterThread::getTokensNeeded()
+{
+    if (!tokenPort) {
+        return 0;
+    }
+
+    int tokens_needed = 0;
+    curAction = curEpisode->peekCurAction();
+
+    switch(curAction->getType()) {
+        case Episode::Action::Type::ATOMIC:
+            tokens_needed = numLanes;
+            break;
+        case Episode::Action::Type::LOAD:
+        case Episode::Action::Type::STORE:
+            for (int lane = 0; lane < numLanes; ++lane) {
+                Location loc = curAction->getLocation(lane);
+
+                if (loc != AddressManager::INVALID_LOCATION && loc >= 0) {
+                    tokens_needed++;
+                }
+            }
+            break;
+        default:
+            tokens_needed = 0;
+    }
+
+    return tokens_needed;
+}
+
 bool
 TesterThread::isNextActionReady()
 {
@@ -162,10 +193,13 @@ TesterThread::isNextActionReady()

        // Only GPU wavefront threads have a token port. For all other types
        // of threads evaluate to true.
-        bool haveTokens = tokenPort ? tokenPort->haveTokens(numLanes) : true;
+        bool haveTokens = true;

        switch(curAction->getType()) {
            case Episode::Action::Type::ATOMIC:
+                haveTokens = tokenPort ?
+                    tokenPort->haveTokens(getTokensNeeded()) : true;
+
                // an atomic action must wait for all previous requests
                // to complete
                if (pendingLdStCount == 0 &&
@@ -206,7 +240,7 @@ TesterThread::isNextActionReady()
                assert(pendingAtomicCount == 0);

                // can't issue if there is a pending fence
-                if (pendingFenceCount > 0 || !haveTokens) {
+                if (pendingFenceCount > 0) {
                    return false;
                }

@@ -215,7 +249,7 @@ TesterThread::isNextActionReady()
                for (int lane = 0; lane < numLanes; ++lane) {
                    Location loc = curAction->getLocation(lane);

-                    if (loc != AddressManager::INVALID_LOCATION) {
+                    if (loc != AddressManager::INVALID_LOCATION && loc >= 0) {
                        Addr addr = addrManager->getAddress(loc);

                        if (outstandingLoads.find(addr) !=
@@ -237,6 +271,12 @@ TesterThread::isNextActionReady()
                    }
                }

+                haveTokens = tokenPort ?
+                    tokenPort->haveTokens(getTokensNeeded()) : true;
+                if (!haveTokens) {
+                    return false;
+                }
+
                return true;
            default:
                panic("The tester got an invalid action\n");
@@ -250,7 +290,7 @@ TesterThread::issueNextAction()
    switch(curAction->getType()) {
        case Episode::Action::Type::ATOMIC:
            if (tokenPort) {
-                tokenPort->acquireTokens(numLanes);
+                tokenPort->acquireTokens(getTokensNeeded());
            }
            issueAtomicOps();
            break;
@@ -262,13 +302,13 @@ TesterThread::issueNextAction()
            break;
        case Episode::Action::Type::LOAD:
            if (tokenPort) {
-                tokenPort->acquireTokens(numLanes);
+                tokenPort->acquireTokens(getTokensNeeded());
            }
            issueLoadOps();
            break;
        case Episode::Action::Type::STORE:
            if (tokenPort) {
-                tokenPort->acquireTokens(numLanes);
+                tokenPort->acquireTokens(getTokensNeeded());
            }
            issueStoreOps();
            break;
--- a/src/cpu/testers/gpu_ruby_test/tester_thread.hh
+++ b/src/cpu/testers/gpu_ruby_test/tester_thread.hh
@@ -178,6 +178,7 @@ class TesterThread : public ClockedObject
    // constraints and is ready to issue
    bool isNextActionReady();
    void issueNextAction();
+    int getTokensNeeded();

    // issue Ops to Ruby memory
    // must be implemented by a child class