diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index c39dec8430..b98e5a98f1 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -1047,8 +1047,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index); // This is the senderState needed by the TLB hierarchy to function - TheISA::GpuTLB::TranslationState *translation_state = - new TheISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc, false, + X86ISA::GpuTLB::TranslationState *translation_state = + new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc, false, pkt->senderState); pkt->senderState = translation_state; @@ -1140,7 +1140,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) delete pkt->senderState; // Because it's atomic operation, only need TLB translation state - pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode, + pkt->senderState = new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc); tlbPort[tlbPort_index].sendFunctional(pkt); @@ -1161,8 +1161,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) new_pkt->req->getPaddr()); // safe_cast the senderState - TheISA::GpuTLB::TranslationState *sender_state = - safe_cast(pkt->senderState); + X86ISA::GpuTLB::TranslationState *sender_state = + safe_cast(pkt->senderState); delete sender_state->tlbEntry; delete new_pkt; @@ -1182,7 +1182,7 @@ ComputeUnit::sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt) new ComputeUnit::ScalarDTLBPort::SenderState(gpuDynInst); pkt->senderState = - new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false, + new X86ISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false, pkt->senderState); if (scalarDTLBPort.isStalled()) { @@ -1373,8 +1373,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt) computeUnit->tlbCycles += curTick(); // pop off the TLB translation state - TheISA::GpuTLB::TranslationState *translation_state = - safe_cast(pkt->senderState); + X86ISA::GpuTLB::TranslationState *translation_state = + safe_cast(pkt->senderState); // no PageFaults are permitted for data accesses if (!translation_state->tlbEntry) { @@ -1446,8 +1446,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt) DPRINTF(GPUPrefetch, "CU[%d][%d][%d][%d]: %#x was last\n", computeUnit->cu_id, simdId, wfSlotId, mp_index, last); - int stride = last ? (roundDown(vaddr, TheISA::PageBytes) - - roundDown(last, TheISA::PageBytes)) >> TheISA::PageShift + int stride = last ? (roundDown(vaddr, X86ISA::PageBytes) - + roundDown(last, X86ISA::PageBytes)) >> X86ISA::PageShift : 0; DPRINTF(GPUPrefetch, "Stride is %d\n", stride); @@ -1467,13 +1467,13 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt) // Prefetch Next few pages atomically for (int pf = 1; pf <= computeUnit->prefetchDepth; ++pf) { DPRINTF(GPUPrefetch, "%d * %d: %#x\n", pf, stride, - vaddr+stride*pf*TheISA::PageBytes); + vaddr + stride * pf * X86ISA::PageBytes); if (!stride) break; RequestPtr prefetch_req = std::make_shared( - vaddr + stride * pf * TheISA::PageBytes, + vaddr + stride * pf * X86ISA::PageBytes, sizeof(uint8_t), 0, computeUnit->requestorId(), 0, 0, nullptr); @@ -1484,15 +1484,15 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt) // Because it's atomic operation, only need TLB translation state prefetch_pkt->senderState = - new TheISA::GpuTLB::TranslationState(TLB_mode, + new X86ISA::GpuTLB::TranslationState(TLB_mode, computeUnit->shader->gpuTc, true); // Currently prefetches are zero-latency, hence the sendFunctional sendFunctional(prefetch_pkt); /* safe_cast the senderState */ - TheISA::GpuTLB::TranslationState *tlb_state = - safe_cast( + X86ISA::GpuTLB::TranslationState *tlb_state = + safe_cast( prefetch_pkt->senderState); @@ -1639,8 +1639,8 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt) { assert(pkt->senderState); - TheISA::GpuTLB::TranslationState *translation_state = - safe_cast(pkt->senderState); + X86ISA::GpuTLB::TranslationState *translation_state = + safe_cast(pkt->senderState); // Page faults are not allowed fatal_if(!translation_state->tlbEntry, @@ -1704,8 +1704,8 @@ ComputeUnit::ITLBPort::recvTimingResp(PacketPtr pkt) assert(pkt->senderState); // pop off the TLB translation state - TheISA::GpuTLB::TranslationState *translation_state - = safe_cast(pkt->senderState); + X86ISA::GpuTLB::TranslationState *translation_state + = safe_cast(pkt->senderState); bool success = translation_state->tlbEntry != nullptr; delete translation_state->tlbEntry; @@ -2429,7 +2429,7 @@ ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst) void ComputeUnit::updatePageDivergenceDist(Addr addr) { - Addr virt_page_addr = roundDown(addr, TheISA::PageBytes); + Addr virt_page_addr = roundDown(addr, X86ISA::PageBytes); if (!pagesTouched.count(virt_page_addr)) pagesTouched[virt_page_addr] = 1; diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc index c4450fabaa..7a939e6178 100644 --- a/src/gpu-compute/gpu_tlb.cc +++ b/src/gpu-compute/gpu_tlb.cc @@ -94,7 +94,7 @@ namespace X86ISA * @warning: the set-associative version assumes you have a * fixed page size of 4KB. * If the page size is greather than 4KB (as defined in the - * TheISA::PageBytes), then there are various issues w/ the current + * X86ISA::PageBytes), then there are various issues w/ the current * implementation (you'd have the same 8KB page being replicated in * different sets etc) */ @@ -754,7 +754,7 @@ namespace X86ISA assert(pkt->senderState); Addr virt_page_addr = roundDown(pkt->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); TranslationState *sender_state = safe_cast(pkt->senderState); @@ -1159,7 +1159,7 @@ namespace X86ISA local_entry = new_entry; if (allocationPolicy) { - Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); + Addr virt_page_addr = roundDown(vaddr, X86ISA::PageBytes); DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", virt_page_addr); @@ -1210,7 +1210,7 @@ namespace X86ISA bool update_stats = !sender_state->prefetch; Addr virt_page_addr = roundDown(pkt->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); if (update_stats) tlb->updatePageFootprint(virt_page_addr); @@ -1339,7 +1339,7 @@ namespace X86ISA GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) { Addr virt_page_addr = roundDown(pkt->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", virt_page_addr); diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index 0b41193367..9db0347a86 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -105,7 +105,7 @@ Shader::mmap(int length) Addr start; // round up length to the next page - length = roundUp(length, TheISA::PageBytes); + length = roundUp(length, X86ISA::PageBytes); Process *proc = gpuTc->getProcessPtr(); auto mem_state = proc->memState; diff --git a/src/gpu-compute/tlb_coalescer.cc b/src/gpu-compute/tlb_coalescer.cc index 55be11e063..a22b2c8861 100644 --- a/src/gpu-compute/tlb_coalescer.cc +++ b/src/gpu-compute/tlb_coalescer.cc @@ -106,10 +106,10 @@ TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt) // Rule 1: Coalesce requests only if they // fall within the same virtual page Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); if (incoming_virt_page_addr != coalesced_virt_page_addr) return false; @@ -139,7 +139,7 @@ TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt) void TLBCoalescer::updatePhysAddresses(PacketPtr pkt) { - Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); + Addr virt_page_addr = roundDown(pkt->req->getVaddr(), X86ISA::PageBytes); DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n", issuedTranslationsTable[virt_page_addr].size(), virt_page_addr); @@ -345,7 +345,7 @@ TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt) // print a warning message. This is a temporary caveat of // the current simulator where atomic and timing requests can // coexist. FIXME remove this check/warning in the future. - Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); + Addr virt_page_addr = roundDown(pkt->req->getVaddr(), X86ISA::PageBytes); int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr); if (map_count) { @@ -430,7 +430,7 @@ TLBCoalescer::processProbeTLBEvent() // compute virtual page address for this request Addr virt_page_addr = roundDown(first_packet->req->getVaddr(), - TheISA::PageBytes); + X86ISA::PageBytes); // is there another outstanding request for the same page addr? int pending_reqs =