arch-gcn3,gpu-compute: Move GCN3 specific TLB to arch
Move GpuTLB and TLBCoalescer to GCN3 as the TLB format is specific to GCN3 and SE mode / APU simulation. Vega will have its own TLB, coalescer, and walker suitable for a dGPU. This also adds a using alias for the TLB translation state to reduce the number of references to TheISA and X86ISA. X86 specific includes are also removed. Change-Id: I34448bb4e5ddb9980b34a55bc717bbcea0e03db5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/49847 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -39,10 +39,15 @@ if not env['BUILD_GPU']:
|
||||
Return()
|
||||
|
||||
if env['TARGET_GPU_ISA'] == 'gcn3':
|
||||
SimObject('X86GPUTLB.py')
|
||||
|
||||
Source('decoder.cc')
|
||||
Source('insts/gpu_static_inst.cc')
|
||||
Source('insts/instructions.cc')
|
||||
Source('insts/op_encodings.cc')
|
||||
Source('isa.cc')
|
||||
Source('registers.cc')
|
||||
Source('tlb.cc')
|
||||
Source('tlb_coalescer.cc')
|
||||
|
||||
DebugFlag('GCN3', 'Debug flag for GCN3 GPU ISA')
|
||||
|
||||
@@ -39,7 +39,7 @@ from m5.SimObject import SimObject
|
||||
class X86GPUTLB(ClockedObject):
|
||||
type = 'X86GPUTLB'
|
||||
cxx_class = 'gem5::X86ISA::GpuTLB'
|
||||
cxx_header = 'gpu-compute/gpu_tlb.hh'
|
||||
cxx_header = 'arch/amdgpu/gcn3/tlb.hh'
|
||||
size = Param.Int(64, "TLB size (number of entries)")
|
||||
assoc = Param.Int(64, "TLB associativity")
|
||||
|
||||
@@ -63,7 +63,8 @@ class X86GPUTLB(ClockedObject):
|
||||
class TLBCoalescer(ClockedObject):
|
||||
type = 'TLBCoalescer'
|
||||
cxx_class = 'gem5::TLBCoalescer'
|
||||
cxx_header = 'gpu-compute/tlb_coalescer.hh'
|
||||
cxx_header = 'arch/amdgpu/gcn3/tlb_coalescer.hh'
|
||||
|
||||
probesPerCycle = Param.Int(2, "Number of TLB probes per cycle")
|
||||
coalescingWindow = Param.Int(1, "Permit coalescing across that many ticks")
|
||||
cpu_side_ports = VectorResponsePort("Port on side closer to CPU/CU")
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <type_traits>
|
||||
|
||||
#include "arch/amdgpu/gcn3/gpu_registers.hh"
|
||||
#include "arch/amdgpu/gcn3/tlb.hh"
|
||||
#include "gpu-compute/dispatcher.hh"
|
||||
#include "gpu-compute/hsa_queue_entry.hh"
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2011-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
@@ -14,9 +14,9 @@
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
@@ -30,10 +30,9 @@
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Lisa Hsu
|
||||
*/
|
||||
|
||||
#include "gpu-compute/gpu_tlb.hh"
|
||||
#include "arch/amdgpu/gcn3/tlb.hh"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
@@ -438,6 +438,8 @@ namespace X86ISA
|
||||
};
|
||||
}
|
||||
|
||||
using GpuTranslationState = X86ISA::GpuTLB::TranslationState;
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __GPU_TLB_HH__
|
||||
@@ -31,7 +31,7 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "gpu-compute/tlb_coalescer.hh"
|
||||
#include "arch/amdgpu/gcn3/tlb_coalescer.hh"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@@ -101,11 +101,11 @@ TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
|
||||
if (disableCoalescing)
|
||||
return false;
|
||||
|
||||
TheISA::GpuTLB::TranslationState *incoming_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
|
||||
GpuTranslationState *incoming_state =
|
||||
safe_cast<GpuTranslationState*>(incoming_pkt->senderState);
|
||||
|
||||
TheISA::GpuTLB::TranslationState *coalesced_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
|
||||
GpuTranslationState *coalesced_state =
|
||||
safe_cast<GpuTranslationState*>(coalesced_pkt->senderState);
|
||||
|
||||
// Rule 1: Coalesce requests only if they
|
||||
// fall within the same virtual page
|
||||
@@ -148,8 +148,8 @@ TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
|
||||
DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
|
||||
issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
|
||||
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
|
||||
assert(tlb_entry);
|
||||
@@ -167,8 +167,8 @@ TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
|
||||
|
||||
for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
|
||||
PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(
|
||||
local_pkt->senderState);
|
||||
|
||||
// we are sending the packet back, so pop the reqCnt associated
|
||||
@@ -238,8 +238,8 @@ TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
|
||||
// number of coalesced reqs for a given window
|
||||
int coalescedReq_cnt = 0;
|
||||
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
// push back the port to remember the path back
|
||||
sender_state->ports.push_back(this);
|
||||
@@ -337,8 +337,8 @@ void
|
||||
TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
|
||||
{
|
||||
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
bool update_stats = !sender_state->isPrefetch;
|
||||
|
||||
@@ -460,8 +460,8 @@ TLBCoalescer::processProbeTLBEvent()
|
||||
rejected = true;
|
||||
++vector_index;
|
||||
} else {
|
||||
TheISA::GpuTLB::TranslationState *tmp_sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>
|
||||
GpuTranslationState *tmp_sender_state =
|
||||
safe_cast<GpuTranslationState*>
|
||||
(first_packet->senderState);
|
||||
|
||||
bool update_stats = !tmp_sender_state->isPrefetch;
|
||||
@@ -39,13 +39,13 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/amdgpu/gcn3/tlb.hh"
|
||||
#include "arch/generic/tlb.hh"
|
||||
#include "arch/x86/isa.hh"
|
||||
#include "arch/x86/pagetable.hh"
|
||||
#include "arch/x86/regs/segment.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "gpu-compute/gpu_tlb.hh"
|
||||
#include "mem/port.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "params/TLBCoalescer.hh"
|
||||
@@ -39,7 +39,6 @@ if not env['BUILD_GPU']:
|
||||
SimObject('GPU.py')
|
||||
SimObject('GPUStaticInstFlags.py')
|
||||
SimObject('LdsState.py')
|
||||
SimObject('X86GPUTLB.py')
|
||||
|
||||
Source('comm.cc')
|
||||
Source('compute_unit.cc')
|
||||
@@ -54,7 +53,6 @@ Source('gpu_dyn_inst.cc')
|
||||
Source('gpu_exec_context.cc')
|
||||
Source('gpu_render_driver.cc')
|
||||
Source('gpu_static_inst.cc')
|
||||
Source('gpu_tlb.cc')
|
||||
Source('lds_state.cc')
|
||||
Source('local_memory_pipeline.cc')
|
||||
Source('pool_manager.cc')
|
||||
@@ -69,7 +67,6 @@ Source('shader.cc')
|
||||
Source('dyn_pool_manager.cc')
|
||||
Source('simple_pool_manager.cc')
|
||||
Source('static_register_manager_policy.cc')
|
||||
Source('tlb_coalescer.cc')
|
||||
Source('vector_register_file.cc')
|
||||
Source('wavefront.cc')
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "arch/x86/page_size.hh"
|
||||
#include "base/output.hh"
|
||||
#include "debug/GPUDisp.hh"
|
||||
#include "debug/GPUExec.hh"
|
||||
@@ -1076,8 +1075,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index);
|
||||
|
||||
// This is the senderState needed by the TLB hierarchy to function
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc, false,
|
||||
GpuTranslationState *translation_state =
|
||||
new GpuTranslationState(TLB_mode, shader->gpuTc, false,
|
||||
pkt->senderState);
|
||||
|
||||
pkt->senderState = translation_state;
|
||||
@@ -1091,8 +1090,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
stats.hitsPerTLBLevel[hit_level]++;
|
||||
|
||||
// New SenderState for the memory access
|
||||
X86ISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
delete sender_state->tlbEntry;
|
||||
delete sender_state->saved;
|
||||
@@ -1169,7 +1168,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
delete pkt->senderState;
|
||||
|
||||
// Because it's atomic operation, only need TLB translation state
|
||||
pkt->senderState = new X86ISA::GpuTLB::TranslationState(TLB_mode,
|
||||
pkt->senderState = new GpuTranslationState(TLB_mode,
|
||||
shader->gpuTc);
|
||||
|
||||
tlbPort[tlbPort_index].sendFunctional(pkt);
|
||||
@@ -1190,8 +1189,8 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
new_pkt->req->getPaddr());
|
||||
|
||||
// safe_cast the senderState
|
||||
X86ISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
delete sender_state->tlbEntry;
|
||||
delete new_pkt;
|
||||
@@ -1211,7 +1210,7 @@ ComputeUnit::sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
|
||||
new ComputeUnit::ScalarDTLBPort::SenderState(gpuDynInst);
|
||||
|
||||
pkt->senderState =
|
||||
new X86ISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false,
|
||||
new GpuTranslationState(tlb_mode, shader->gpuTc, false,
|
||||
pkt->senderState);
|
||||
|
||||
if (scalarDTLBPort.isStalled()) {
|
||||
@@ -1397,8 +1396,8 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
computeUnit->stats.tlbCycles += curTick();
|
||||
|
||||
// pop off the TLB translation state
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *translation_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
// no PageFaults are permitted for data accesses
|
||||
if (!translation_state->tlbEntry) {
|
||||
@@ -1508,15 +1507,15 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
|
||||
// Because it's atomic operation, only need TLB translation state
|
||||
prefetch_pkt->senderState =
|
||||
new X86ISA::GpuTLB::TranslationState(TLB_mode,
|
||||
new GpuTranslationState(TLB_mode,
|
||||
computeUnit->shader->gpuTc, true);
|
||||
|
||||
// Currently prefetches are zero-latency, hence the sendFunctional
|
||||
sendFunctional(prefetch_pkt);
|
||||
|
||||
/* safe_cast the senderState */
|
||||
X86ISA::GpuTLB::TranslationState *tlb_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(
|
||||
GpuTranslationState *tlb_state =
|
||||
safe_cast<GpuTranslationState*>(
|
||||
prefetch_pkt->senderState);
|
||||
|
||||
|
||||
@@ -1663,8 +1662,8 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
assert(pkt->senderState);
|
||||
|
||||
X86ISA::GpuTLB::TranslationState *translation_state =
|
||||
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *translation_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
// Page faults are not allowed
|
||||
fatal_if(!translation_state->tlbEntry,
|
||||
@@ -1728,8 +1727,8 @@ ComputeUnit::ITLBPort::recvTimingResp(PacketPtr pkt)
|
||||
assert(pkt->senderState);
|
||||
|
||||
// pop off the TLB translation state
|
||||
X86ISA::GpuTLB::TranslationState *translation_state
|
||||
= safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *translation_state
|
||||
= safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
bool success = translation_state->tlbEntry != nullptr;
|
||||
delete translation_state->tlbEntry;
|
||||
|
||||
@@ -174,7 +174,7 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
|
||||
|
||||
// Sender State needed by TLB hierarchy
|
||||
pkt->senderState =
|
||||
new TheISA::GpuTLB::TranslationState(BaseMMU::Execute,
|
||||
new GpuTranslationState(BaseMMU::Execute,
|
||||
computeUnit.shader->gpuTc,
|
||||
false, pkt->senderState);
|
||||
|
||||
@@ -201,13 +201,13 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
|
||||
}
|
||||
} else {
|
||||
pkt->senderState =
|
||||
new TheISA::GpuTLB::TranslationState(BaseMMU::Execute,
|
||||
new GpuTranslationState(BaseMMU::Execute,
|
||||
computeUnit.shader->gpuTc);
|
||||
|
||||
computeUnit.sqcTLBPort.sendFunctional(pkt);
|
||||
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
delete sender_state->tlbEntry;
|
||||
delete sender_state;
|
||||
|
||||
@@ -35,8 +35,6 @@
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "arch/x86/linux/linux.hh"
|
||||
#include "arch/x86/page_size.hh"
|
||||
#include "base/chunk_generator.hh"
|
||||
#include "debug/GPUAgentDisp.hh"
|
||||
#include "debug/GPUDisp.hh"
|
||||
@@ -430,7 +428,7 @@ Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
|
||||
{
|
||||
// update senderState. Need to know the gpuTc and the TLB mode
|
||||
pkt->senderState =
|
||||
new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
|
||||
new GpuTranslationState(mode, gpuTc, false);
|
||||
|
||||
// even when the perLaneTLB flag is turned on
|
||||
// it's ok tp send all accesses through lane 0
|
||||
@@ -439,8 +437,8 @@ Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
|
||||
cuList[cu_id]->tlbPort[0].sendFunctional(pkt);
|
||||
|
||||
/* safe_cast the senderState */
|
||||
TheISA::GpuTLB::TranslationState *sender_state =
|
||||
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
||||
GpuTranslationState *sender_state =
|
||||
safe_cast<GpuTranslationState*>(pkt->senderState);
|
||||
|
||||
delete sender_state->tlbEntry;
|
||||
delete pkt->senderState;
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "arch/gpu_isa.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "base/stats/group.hh"
|
||||
#include "base/types.hh"
|
||||
@@ -47,7 +48,6 @@
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "gpu-compute/compute_unit.hh"
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
#include "gpu-compute/gpu_tlb.hh"
|
||||
#include "gpu-compute/hsa_queue_entry.hh"
|
||||
#include "gpu-compute/lds_state.hh"
|
||||
#include "mem/page_table.hh"
|
||||
|
||||
Reference in New Issue
Block a user