gpu: Use X86ISA instead of TheISA in src/gpu-compute.
These files are nominally not tied to the X86ISA, but in reality they are because they reach into the GPU TLB, which is defined unchangeably in the X86ISA namespaces, and uses data structures within it. Rather than try to pretend that these structures are generic, we'll instead just use X86ISA instead of TheISA. If this really does become generic in the future, a base class with the ISA agnostic essentials defined in it can be used instead, and the ISA specific TLBs can defined their own derived class which has whatever else they need. Really the compute unit shouldn't be communicating with the TLB using sender state since those are supposed to be little notes for the sender to keep with a transaction, not for communicating between entities across a port. Change-Id: Ie6573396f6c77a9a02194f5f4595eefa45d6d66b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/34174 Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu> Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1047,8 +1047,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index);
|
||||
|
||||
// This is the senderState needed by the TLB hierarchy to function
|
||||
TheISA::GpuTLB::TranslationState *translation_state =
|
||||
new TheISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc, false,
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc, false,
|
||||
pkt->senderState);
|
||||
|
||||
pkt->senderState = translation_state;
|
||||
@@ -1140,7 +1140,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
delete pkt->senderState;
|
||||
|
||||
// Because it's atomic operation, only need TLB translation state
|
||||
pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode,
|
||||
pkt->senderState = new X86ISA::GpuTLB::TranslationState(TLB_mode,
|
||||
shader->gpuTc);
|
||||
|
||||
tlbPort[tlbPort_index].sendFunctional(pkt);
|
||||
@@ -1161,8 +1161,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
new_pkt->req->getPaddr());
|
||||
|
||||
// safe_cast the senderState
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
X86ISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
|
||||
delete sender_state->tlbEntry;
|
||||
delete new_pkt;
|
||||
@@ -1182,7 +1182,7 @@ ComputeUnit::sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
|
||||
new ComputeUnit::ScalarDTLBPort::SenderState(gpuDynInst);
|
||||
|
||||
pkt->senderState =
|
||||
new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false,
|
||||
new X86ISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false,
|
||||
pkt->senderState);
|
||||
|
||||
if (scalarDTLBPort.isStalled()) {
|
||||
@@ -1373,8 +1373,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
computeUnit->tlbCycles += curTick();
|
||||
|
||||
// pop off the TLB translation state
|
||||
TheISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
|
||||
// no PageFaults are permitted for data accesses
|
||||
if (!translation_state->tlbEntry) {
|
||||
@@ -1446,8 +1446,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
DPRINTF(GPUPrefetch, "CU[%d][%d][%d][%d]: %#x was last\n",
|
||||
computeUnit->cu_id, simdId, wfSlotId, mp_index, last);
|
||||
|
||||
int stride = last ? (roundDown(vaddr, TheISA::PageBytes) -
|
||||
roundDown(last, TheISA::PageBytes)) >> TheISA::PageShift
|
||||
int stride = last ? (roundDown(vaddr, X86ISA::PageBytes) -
|
||||
roundDown(last, X86ISA::PageBytes)) >> X86ISA::PageShift
|
||||
: 0;
|
||||
|
||||
DPRINTF(GPUPrefetch, "Stride is %d\n", stride);
|
||||
@@ -1467,13 +1467,13 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
// Prefetch Next few pages atomically
|
||||
for (int pf = 1; pf <= computeUnit->prefetchDepth; ++pf) {
|
||||
DPRINTF(GPUPrefetch, "%d * %d: %#x\n", pf, stride,
|
||||
vaddr+stride*pf*TheISA::PageBytes);
|
||||
vaddr + stride * pf * X86ISA::PageBytes);
|
||||
|
||||
if (!stride)
|
||||
break;
|
||||
|
||||
RequestPtr prefetch_req = std::make_shared<Request>(
|
||||
vaddr + stride * pf * TheISA::PageBytes,
|
||||
vaddr + stride * pf * X86ISA::PageBytes,
|
||||
sizeof(uint8_t), 0,
|
||||
computeUnit->requestorId(),
|
||||
0, 0, nullptr);
|
||||
@@ -1484,15 +1484,15 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
|
||||
// Because it's atomic operation, only need TLB translation state
|
||||
prefetch_pkt->senderState =
|
||||
new TheISA::GpuTLB::TranslationState(TLB_mode,
|
||||
new X86ISA::GpuTLB::TranslationState(TLB_mode,
|
||||
computeUnit->shader->gpuTc, true);
|
||||
|
||||
// Currently prefetches are zero-latency, hence the sendFunctional
|
||||
sendFunctional(prefetch_pkt);
|
||||
|
||||
/* safe_cast the senderState */
|
||||
TheISA::GpuTLB::TranslationState *tlb_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(
|
||||
X86ISA::GpuTLB::TranslationState *tlb_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(
|
||||
prefetch_pkt->senderState);
|
||||
|
||||
|
||||
@@ -1639,8 +1639,8 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
assert(pkt->senderState);
|
||||
|
||||
TheISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
|
||||
// Page faults are not allowed
|
||||
fatal_if(!translation_state->tlbEntry,
|
||||
@@ -1704,8 +1704,8 @@ ComputeUnit::ITLBPort::recvTimingResp(PacketPtr pkt)
|
||||
assert(pkt->senderState);
|
||||
|
||||
// pop off the TLB translation state
|
||||
TheISA::GpuTLB::TranslationState *translation_state
|
||||
= safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
X86ISA::GpuTLB::TranslationState *translation_state
|
||||
= safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
|
||||
bool success = translation_state->tlbEntry != nullptr;
|
||||
delete translation_state->tlbEntry;
|
||||
@@ -2429,7 +2429,7 @@ ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
|
||||
void
|
||||
ComputeUnit::updatePageDivergenceDist(Addr addr)
|
||||
{
|
||||
Addr virt_page_addr = roundDown(addr, TheISA::PageBytes);
|
||||
Addr virt_page_addr = roundDown(addr, X86ISA::PageBytes);
|
||||
|
||||
if (!pagesTouched.count(virt_page_addr))
|
||||
pagesTouched[virt_page_addr] = 1;
|
||||
|
||||
@@ -94,7 +94,7 @@ namespace X86ISA
|
||||
* @warning: the set-associative version assumes you have a
|
||||
* fixed page size of 4KB.
|
||||
* If the page size is greather than 4KB (as defined in the
|
||||
* TheISA::PageBytes), then there are various issues w/ the current
|
||||
* X86ISA::PageBytes), then there are various issues w/ the current
|
||||
* implementation (you'd have the same 8KB page being replicated in
|
||||
* different sets etc)
|
||||
*/
|
||||
@@ -754,7 +754,7 @@ namespace X86ISA
|
||||
assert(pkt->senderState);
|
||||
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
TranslationState *sender_state =
|
||||
safe_cast<TranslationState*>(pkt->senderState);
|
||||
@@ -1159,7 +1159,7 @@ namespace X86ISA
|
||||
local_entry = new_entry;
|
||||
|
||||
if (allocationPolicy) {
|
||||
Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
|
||||
Addr virt_page_addr = roundDown(vaddr, X86ISA::PageBytes);
|
||||
|
||||
DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
|
||||
virt_page_addr);
|
||||
@@ -1210,7 +1210,7 @@ namespace X86ISA
|
||||
bool update_stats = !sender_state->prefetch;
|
||||
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
if (update_stats)
|
||||
tlb->updatePageFootprint(virt_page_addr);
|
||||
@@ -1339,7 +1339,7 @@ namespace X86ISA
|
||||
GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
|
||||
virt_page_addr);
|
||||
|
||||
@@ -105,7 +105,7 @@ Shader::mmap(int length)
|
||||
Addr start;
|
||||
|
||||
// round up length to the next page
|
||||
length = roundUp(length, TheISA::PageBytes);
|
||||
length = roundUp(length, X86ISA::PageBytes);
|
||||
|
||||
Process *proc = gpuTc->getProcessPtr();
|
||||
auto mem_state = proc->memState;
|
||||
|
||||
@@ -106,10 +106,10 @@ TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
|
||||
// Rule 1: Coalesce requests only if they
|
||||
// fall within the same virtual page
|
||||
Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
if (incoming_virt_page_addr != coalesced_virt_page_addr)
|
||||
return false;
|
||||
@@ -139,7 +139,7 @@ TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
|
||||
void
|
||||
TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
|
||||
{
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), X86ISA::PageBytes);
|
||||
|
||||
DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
|
||||
issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
|
||||
@@ -345,7 +345,7 @@ TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
|
||||
// print a warning message. This is a temporary caveat of
|
||||
// the current simulator where atomic and timing requests can
|
||||
// coexist. FIXME remove this check/warning in the future.
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
|
||||
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), X86ISA::PageBytes);
|
||||
int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
|
||||
|
||||
if (map_count) {
|
||||
@@ -430,7 +430,7 @@ TLBCoalescer::processProbeTLBEvent()
|
||||
|
||||
// compute virtual page address for this request
|
||||
Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
|
||||
TheISA::PageBytes);
|
||||
X86ISA::PageBytes);
|
||||
|
||||
// is there another outstanding request for the same page addr?
|
||||
int pending_reqs =
|
||||
|
||||
Reference in New Issue
Block a user