mem-ruby: Add DMA support to MOESI_AMD_Base-dir.sm
This change adds DMA support to the MOESI_AMD_Base-dir.sm, which is needed to support ROCm apps/GCN3 ISA in the VIPER ptl. The DMA controller is copied from the MOESI_hammer-dma.sm with few modifications. Change-Id: I56141436eee1c8f62c2a0915fa3b63b83bbcbc9a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29914 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
18ebe62598
commit
b811d3a342
@@ -2,6 +2,7 @@ protocol "GPU_VIPER";
|
||||
include "RubySlicc_interfaces.slicc";
|
||||
include "MOESI_AMD_Base-msg.sm";
|
||||
include "MOESI_AMD_Base-dir.sm";
|
||||
include "MOESI_AMD_Base-dma.sm";
|
||||
include "MOESI_AMD_Base-CorePair.sm";
|
||||
include "GPU_VIPER-msg.sm";
|
||||
include "GPU_VIPER-TCP.sm";
|
||||
|
||||
@@ -42,6 +42,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
bool useL3OnWT := "False";
|
||||
Cycles to_memory_controller_latency := 1;
|
||||
|
||||
// DMA
|
||||
MessageBuffer * requestFromDMA, network="From", virtual_network="1", vnet_type="request";
|
||||
MessageBuffer * responseToDMA, network="To", virtual_network="3", vnet_type="request";
|
||||
|
||||
// From the Cores
|
||||
MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request";
|
||||
MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response";
|
||||
@@ -63,13 +67,17 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
// BL is Busy because it's possible for the data only to be in the network
|
||||
// in the WB, L3 has sent it and gone on with its business in possibly I
|
||||
// state.
|
||||
BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory";
|
||||
BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
|
||||
BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
|
||||
B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
|
||||
BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
|
||||
BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory";
|
||||
BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
|
||||
BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
|
||||
B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
|
||||
BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory";
|
||||
BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory";
|
||||
BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
|
||||
BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
|
||||
B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
|
||||
@@ -107,6 +115,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
UnblockWriteThrough, desc="Unblock because of writethrough request finishing";
|
||||
|
||||
StaleVicDirty, desc="Core invalidated before VicDirty processed";
|
||||
|
||||
// DMA
|
||||
DmaRead, desc="DMA read";
|
||||
DmaWrite, desc="DMA write";
|
||||
}
|
||||
|
||||
enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
|
||||
@@ -148,6 +160,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
bool L3Hit, default="false", desc="Was this an L3 hit?";
|
||||
uint64_t probe_id, desc="probe id for lifetime profiling";
|
||||
WriteMask writeMask, desc="outstanding write through mask";
|
||||
int Len, desc="Length of memory request for DMA";
|
||||
}
|
||||
|
||||
structure(TBETable, external="yes") {
|
||||
@@ -266,6 +279,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
|
||||
// ** OUT_PORTS **
|
||||
out_port(dmaResponseQueue_out, DMAResponseMsg, responseToDMA);
|
||||
|
||||
out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore);
|
||||
out_port(responseNetwork_out, ResponseMsg, responseToCore);
|
||||
|
||||
@@ -276,6 +291,23 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
|
||||
// ** IN_PORTS **
|
||||
|
||||
// DMA Ports
|
||||
in_port(dmaRequestQueue_in, DMARequestMsg, requestFromDMA, rank=6) {
|
||||
if (dmaRequestQueue_in.isReady(clockEdge())) {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
TBE tbe := TBEs.lookup(in_msg.LineAddress);
|
||||
CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.LineAddress));
|
||||
if (in_msg.Type == DMARequestType:READ) {
|
||||
trigger(Event:DmaRead, in_msg.LineAddress, entry, tbe);
|
||||
} else if (in_msg.Type == DMARequestType:WRITE) {
|
||||
trigger(Event:DmaWrite, in_msg.LineAddress, entry, tbe);
|
||||
} else {
|
||||
error("Unknown DMA msg");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger Queue
|
||||
in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) {
|
||||
if (triggerQueue_in.isReady(clockEdge())) {
|
||||
@@ -395,6 +427,25 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
|
||||
// Actions
|
||||
action(dd_sendResponseDmaData, "dd", desc="send DMA data response") {
|
||||
enqueue(dmaResponseQueue_out, DMAResponseMsg, response_latency) {
|
||||
out_msg.LineAddress := address;
|
||||
out_msg.Type := DMAResponseType:DATA;
|
||||
out_msg.Destination.add(tbe.OriginalRequestor);
|
||||
out_msg.DataBlk := tbe.DataBlk;
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
|
||||
action(da_sendResponseDmaAck, "da", desc="send DMA data response") {
|
||||
enqueue(dmaResponseQueue_out, DMAResponseMsg, response_latency) {
|
||||
out_msg.LineAddress := address;
|
||||
out_msg.Type := DMAResponseType:ACK;
|
||||
out_msg.Destination.add(tbe.OriginalRequestor);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Control;
|
||||
}
|
||||
}
|
||||
|
||||
action(s_sendResponseS, "s", desc="send Shared response") {
|
||||
enqueue(responseNetwork_out, ResponseMsg, response_latency) {
|
||||
out_msg.addr := address;
|
||||
@@ -531,6 +582,29 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
}
|
||||
|
||||
action(qdr_queueDmaRdReq, "qdr", desc="Read data from memory for DMA") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
if (L3CacheMemory.isTagPresent(address)) {
|
||||
enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := TriggerType:L3Hit;
|
||||
}
|
||||
CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address));
|
||||
tbe.DataBlk := entry.DataBlk;
|
||||
tbe.L3Hit := true;
|
||||
tbe.MemData := true;
|
||||
L3CacheMemory.deallocate(address);
|
||||
} else {
|
||||
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := MemoryRequestType:MEMORY_READ;
|
||||
out_msg.Sender := machineID;
|
||||
out_msg.MessageSize := MessageSizeType:Request_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(l_queueMemRdReq, "lr", desc="Read data from memory") {
|
||||
peek(requestNetwork_in, CPURequestMsg) {
|
||||
if (L3CacheMemory.isTagPresent(address)) {
|
||||
@@ -558,6 +632,40 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
}
|
||||
|
||||
action(icd_probeInvCoreDataForDMA, "icd", desc="Probe inv cores, return data for DMA") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := ProbeRequestType:PrbInv;
|
||||
out_msg.ReturnData := true;
|
||||
out_msg.MessageSize := MessageSizeType:Control;
|
||||
out_msg.Destination.broadcast(MachineType:CorePair);
|
||||
|
||||
// add relevant TCC node to list. This replaces all TCPs and SQCs
|
||||
if (noTCCdir) {
|
||||
out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
|
||||
TCC_select_low_bit, TCC_select_num_bits));
|
||||
} else {
|
||||
out_msg.Destination.add(mapAddressToRange(address,
|
||||
MachineType:TCCdir,
|
||||
TCC_select_low_bit, TCC_select_num_bits));
|
||||
}
|
||||
out_msg.Destination.remove(in_msg.Requestor);
|
||||
tbe.NumPendingAcks := out_msg.Destination.count();
|
||||
if (tbe.NumPendingAcks == 0) {
|
||||
enqueue(triggerQueue_out, TriggerMsg, 1) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := TriggerType:AcksComplete;
|
||||
}
|
||||
}
|
||||
DPRINTF(RubySlicc, "%s\n", out_msg);
|
||||
APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
|
||||
APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
|
||||
tbe.ProbeRequestStartTime := curCycle();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") {
|
||||
peek(requestNetwork_in, CPURequestMsg) {
|
||||
enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
|
||||
@@ -596,6 +704,42 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
}
|
||||
|
||||
action(scd_probeShrCoreDataForDma, "dsc", desc="probe shared cores, return data for DMA") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := ProbeRequestType:PrbDowngrade;
|
||||
out_msg.ReturnData := true;
|
||||
out_msg.MessageSize := MessageSizeType:Control;
|
||||
out_msg.Destination.broadcast(MachineType:CorePair);
|
||||
// add relevant TCC node to the list. This replaces all TCPs and SQCs
|
||||
if (noTCCdir || CPUonly) {
|
||||
//Don't need to notify TCC about reads
|
||||
} else {
|
||||
out_msg.Destination.add(mapAddressToRange(address,
|
||||
MachineType:TCCdir,
|
||||
TCC_select_low_bit, TCC_select_num_bits));
|
||||
}
|
||||
if (noTCCdir && !CPUonly) {
|
||||
out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
|
||||
TCC_select_low_bit, TCC_select_num_bits));
|
||||
}
|
||||
out_msg.Destination.remove(in_msg.Requestor);
|
||||
tbe.NumPendingAcks := out_msg.Destination.count();
|
||||
if (tbe.NumPendingAcks == 0) {
|
||||
enqueue(triggerQueue_out, TriggerMsg, 1) {
|
||||
out_msg.addr := address;
|
||||
out_msg.Type := TriggerType:AcksComplete;
|
||||
}
|
||||
}
|
||||
DPRINTF(RubySlicc, "%s\n", (out_msg));
|
||||
APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
|
||||
APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
|
||||
tbe.ProbeRequestStartTime := curCycle();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") {
|
||||
peek(requestNetwork_in, CPURequestMsg) { // not the right network?
|
||||
enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) {
|
||||
@@ -681,6 +825,24 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
}
|
||||
|
||||
action(atd_allocateTBEforDMA, "atd", desc="allocate TBE Entry for DMA") {
|
||||
check_allocate(TBEs);
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
TBEs.allocate(address);
|
||||
set_tbe(TBEs.lookup(address));
|
||||
tbe.OriginalRequestor := in_msg.Requestor;
|
||||
tbe.NumPendingAcks := 0;
|
||||
tbe.Dirty := false;
|
||||
tbe.Len := in_msg.Len;
|
||||
if (in_msg.Type == DMARequestType:WRITE) {
|
||||
tbe.wtData := true;
|
||||
tbe.Dirty := true;
|
||||
tbe.DataBlk := in_msg.DataBlk;
|
||||
tbe.writeMask.fillMask();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(t_allocateTBE, "t", desc="allocate TBE Entry") {
|
||||
check_allocate(TBEs);
|
||||
peek(requestNetwork_in, CPURequestMsg) {
|
||||
@@ -867,6 +1029,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
L3CacheMemory.deallocate(address);
|
||||
}
|
||||
|
||||
action(pd_popDmaRequestQueue, "pd", desc="Pop DMA request queue") {
|
||||
dmaRequestQueue_in.dequeue(clockEdge());
|
||||
}
|
||||
|
||||
action(p_popRequestQueue, "p", desc="pop request queue") {
|
||||
requestNetwork_in.dequeue(clockEdge());
|
||||
}
|
||||
@@ -915,7 +1081,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
}
|
||||
|
||||
// TRANSITIONS
|
||||
transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
|
||||
transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
|
||||
st_stallAndWaitRequest;
|
||||
}
|
||||
|
||||
@@ -926,6 +1092,13 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
|
||||
|
||||
// transitions from U
|
||||
transition(U, DmaRead, BDR_PM) {L3TagArrayRead} {
|
||||
atd_allocateTBEforDMA;
|
||||
qdr_queueDmaRdReq;
|
||||
scd_probeShrCoreDataForDma;
|
||||
pd_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} {
|
||||
t_allocateTBE;
|
||||
l_queueMemRdReq;
|
||||
@@ -933,6 +1106,13 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
p_popRequestQueue;
|
||||
}
|
||||
|
||||
transition(U, DmaWrite, BDW_P) {L3TagArrayRead} {
|
||||
atd_allocateTBEforDMA;
|
||||
da_sendResponseDmaAck;
|
||||
icd_probeInvCoreDataForDMA;
|
||||
pd_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite} {
|
||||
t_allocateTBE;
|
||||
w_sendResponseWBAck;
|
||||
@@ -998,15 +1178,15 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
pr_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
|
||||
transition({B, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
|
||||
z_stall;
|
||||
}
|
||||
|
||||
transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, WBAck) {
|
||||
transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, WBAck) {
|
||||
pm_popMemQueue;
|
||||
}
|
||||
|
||||
transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
|
||||
transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
|
||||
rv_removeVicDirtyIgnore;
|
||||
w_sendResponseWBAck;
|
||||
p_popRequestQueue;
|
||||
@@ -1022,6 +1202,11 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
pt_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BDR_PM, MemData, BDR_Pm) {
|
||||
mt_writeMemDataToTBE;
|
||||
pm_popMemQueue;
|
||||
}
|
||||
|
||||
transition(BS_PM, MemData, BS_Pm) {} {
|
||||
mt_writeMemDataToTBE;
|
||||
pm_popMemQueue;
|
||||
@@ -1037,6 +1222,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
pm_popMemQueue;
|
||||
}
|
||||
|
||||
transition(BDR_PM, L3Hit, BDR_Pm) {
|
||||
ptl_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BS_PM, L3Hit, BS_Pm) {} {
|
||||
ptl_popTriggerQueue;
|
||||
}
|
||||
@@ -1049,6 +1238,13 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
ptl_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BDR_M, MemData, U) {
|
||||
mt_writeMemDataToTBE;
|
||||
dd_sendResponseDmaData;
|
||||
dt_deallocateTBE;
|
||||
pm_popMemQueue;
|
||||
}
|
||||
|
||||
transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
|
||||
mt_writeMemDataToTBE;
|
||||
s_sendResponseS;
|
||||
@@ -1100,13 +1296,17 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
ptl_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) {
|
||||
transition({BDR_PM, BS_PM, BDW_P, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) {
|
||||
y_writeProbeDataToTBE;
|
||||
x_decrementAcks;
|
||||
o_checkForCompletion;
|
||||
pr_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(BDR_PM, ProbeAcksComplete, BDR_M) {
|
||||
pt_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BS_PM, ProbeAcksComplete, BS_M) {} {
|
||||
sf_setForwardReqTime;
|
||||
pt_popTriggerQueue;
|
||||
@@ -1122,6 +1322,17 @@ machine(MachineType:Directory, "AMD Baseline protocol")
|
||||
pt_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BDW_P, ProbeAcksComplete, U) {
|
||||
dt_deallocateTBE;
|
||||
pt_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BDR_Pm, ProbeAcksComplete, U) {
|
||||
dd_sendResponseDmaData;
|
||||
dt_deallocateTBE;
|
||||
pt_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
|
||||
sf_setForwardReqTime;
|
||||
s_sendResponseS;
|
||||
|
||||
233
src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm
Normal file
233
src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm
Normal file
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
machine(MachineType:DMA, "DMA Controller")
|
||||
: DMASequencer * dma_sequencer;
|
||||
Cycles request_latency := 6;
|
||||
|
||||
MessageBuffer * responseFromDir, network="From", virtual_network="3",
|
||||
vnet_type="response";
|
||||
MessageBuffer * requestToDir, network="To", virtual_network="1",
|
||||
vnet_type="request";
|
||||
MessageBuffer * mandatoryQueue;
|
||||
{
|
||||
state_declaration(State, desc="DMA states", default="DMA_State_READY") {
|
||||
READY, AccessPermission:Invalid, desc="Ready to accept a new request";
|
||||
BUSY_RD, AccessPermission:Busy, desc="Busy: currently processing a request";
|
||||
BUSY_WR, AccessPermission:Busy, desc="Busy: currently processing a request";
|
||||
}
|
||||
|
||||
enumeration(Event, desc="DMA events") {
|
||||
ReadRequest, desc="A new read request";
|
||||
WriteRequest, desc="A new write request";
|
||||
Data, desc="Data from a DMA memory read";
|
||||
Ack, desc="DMA write to memory completed";
|
||||
}
|
||||
|
||||
structure(TBE, desc="...") {
|
||||
State TBEState, desc="Transient state";
|
||||
DataBlock DataBlk, desc="Data";
|
||||
}
|
||||
|
||||
structure(TBETable, external = "yes") {
|
||||
TBE lookup(Addr);
|
||||
void allocate(Addr);
|
||||
void deallocate(Addr);
|
||||
bool isPresent(Addr);
|
||||
}
|
||||
|
||||
void set_tbe(TBE b);
|
||||
void unset_tbe();
|
||||
void wakeUpAllBuffers();
|
||||
|
||||
TBETable TBEs, template="<DMA_TBE>", constructor="m_number_of_TBEs";
|
||||
|
||||
Tick clockEdge();
|
||||
MachineID mapAddressToMachine(Addr addr, MachineType mtype);
|
||||
|
||||
State getState(TBE tbe, Addr addr) {
|
||||
if (is_valid(tbe)) {
|
||||
return tbe.TBEState;
|
||||
} else {
|
||||
return State:READY;
|
||||
}
|
||||
}
|
||||
|
||||
void setState(TBE tbe, Addr addr, State state) {
|
||||
if (is_valid(tbe)) {
|
||||
tbe.TBEState := state;
|
||||
}
|
||||
}
|
||||
|
||||
AccessPermission getAccessPermission(Addr addr) {
|
||||
return AccessPermission:NotPresent;
|
||||
}
|
||||
|
||||
void setAccessPermission(Addr addr, State state) {
|
||||
}
|
||||
|
||||
void functionalRead(Addr addr, Packet *pkt) {
|
||||
error("DMA does not support functional read.");
|
||||
}
|
||||
|
||||
int functionalWrite(Addr addr, Packet *pkt) {
|
||||
error("DMA does not support functional write.");
|
||||
}
|
||||
|
||||
out_port(requestToDir_out, DMARequestMsg, requestToDir, desc="...");
|
||||
|
||||
in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
|
||||
if (dmaRequestQueue_in.isReady(clockEdge())) {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
if (in_msg.Type == SequencerRequestType:LD ) {
|
||||
trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]);
|
||||
} else if (in_msg.Type == SequencerRequestType:ST) {
|
||||
trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]);
|
||||
} else {
|
||||
error("Invalid request type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") {
|
||||
if (dmaResponseQueue_in.isReady(clockEdge())) {
|
||||
peek( dmaResponseQueue_in, DMAResponseMsg) {
|
||||
if (in_msg.Type == DMAResponseType:ACK) {
|
||||
trigger(Event:Ack, in_msg.LineAddress, TBEs[in_msg.LineAddress]);
|
||||
} else if (in_msg.Type == DMAResponseType:DATA) {
|
||||
trigger(Event:Data, in_msg.LineAddress, TBEs[in_msg.LineAddress]);
|
||||
} else {
|
||||
error("Invalid response type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
enqueue(requestToDir_out, DMARequestMsg, request_latency) {
|
||||
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
|
||||
out_msg.LineAddress := in_msg.LineAddress;
|
||||
out_msg.Type := DMARequestType:READ;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.Len := in_msg.Len;
|
||||
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
|
||||
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
enqueue(requestToDir_out, DMARequestMsg, request_latency) {
|
||||
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
|
||||
out_msg.LineAddress := in_msg.LineAddress;
|
||||
out_msg.Type := DMARequestType:WRITE;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.DataBlk := in_msg.DataBlk;
|
||||
out_msg.Len := in_msg.Len;
|
||||
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
|
||||
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
|
||||
dma_sequencer.ackCallback(address);
|
||||
}
|
||||
|
||||
action(d_dataCallback, "d", desc="Write data to dma sequencer") {
|
||||
dma_sequencer.dataCallback(tbe.DataBlk, address);
|
||||
}
|
||||
|
||||
action(t_updateTBEData, "t", desc="Update TBE Data") {
|
||||
assert(is_valid(tbe));
|
||||
peek( dmaResponseQueue_in, DMAResponseMsg) {
|
||||
tbe.DataBlk := in_msg.DataBlk;
|
||||
}
|
||||
}
|
||||
|
||||
action(v_allocateTBE, "v", desc="Allocate TBE entry") {
|
||||
TBEs.allocate(address);
|
||||
set_tbe(TBEs[address]);
|
||||
}
|
||||
|
||||
action(w_deallocateTBE, "w", desc="Deallocate TBE entry") {
|
||||
TBEs.deallocate(address);
|
||||
unset_tbe();
|
||||
}
|
||||
|
||||
action(p_popRequestQueue, "p", desc="Pop request queue") {
|
||||
dmaRequestQueue_in.dequeue(clockEdge());
|
||||
}
|
||||
|
||||
action(p_popResponseQueue, "\p", desc="Pop request queue") {
|
||||
dmaResponseQueue_in.dequeue(clockEdge());
|
||||
}
|
||||
|
||||
action(zz_stallAndWaitRequestQueue, "zz", desc="...") {
|
||||
stall_and_wait(dmaRequestQueue_in, address);
|
||||
}
|
||||
|
||||
action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") {
|
||||
wakeUpAllBuffers();
|
||||
}
|
||||
|
||||
transition(READY, ReadRequest, BUSY_RD) {
|
||||
v_allocateTBE;
|
||||
s_sendReadRequest;
|
||||
p_popRequestQueue;
|
||||
}
|
||||
|
||||
transition(READY, WriteRequest, BUSY_WR) {
|
||||
v_allocateTBE;
|
||||
s_sendWriteRequest;
|
||||
p_popRequestQueue;
|
||||
}
|
||||
|
||||
transition(BUSY_RD, Data, READY) {
|
||||
t_updateTBEData;
|
||||
d_dataCallback;
|
||||
w_deallocateTBE;
|
||||
p_popResponseQueue;
|
||||
wkad_wakeUpAllDependents;
|
||||
}
|
||||
|
||||
transition(BUSY_WR, Ack, READY) {
|
||||
a_ackCallback;
|
||||
w_deallocateTBE;
|
||||
p_popResponseQueue;
|
||||
wkad_wakeUpAllDependents;
|
||||
}
|
||||
|
||||
transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) {
|
||||
zz_stallAndWaitRequestQueue;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2010-2015, 2018 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
@@ -361,3 +361,51 @@ structure(FifoMsg, desc="...", interface="Message") {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
|
||||
READ, desc="Memory Read";
|
||||
WRITE, desc="Memory Write";
|
||||
NULL, desc="Invalid";
|
||||
}
|
||||
|
||||
enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
|
||||
DATA, desc="DATA read";
|
||||
ACK, desc="ACK write";
|
||||
NULL, desc="Invalid";
|
||||
}
|
||||
|
||||
structure(DMARequestMsg, desc="...", interface="Message") {
|
||||
DMARequestType Type, desc="Request type (read/write)";
|
||||
Addr PhysicalAddress, desc="Physical address for this request";
|
||||
Addr LineAddress, desc="Line address for this request";
|
||||
MachineID Requestor, desc="Node who initiated the request";
|
||||
NetDest Destination, desc="Destination";
|
||||
DataBlock DataBlk, desc="DataBlk attached to this request";
|
||||
int Len, desc="The length of the request";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
|
||||
bool functionalRead(Packet *pkt) {
|
||||
return testAndRead(LineAddress, DataBlk, pkt);
|
||||
}
|
||||
|
||||
bool functionalWrite(Packet *pkt) {
|
||||
return testAndWrite(LineAddress, DataBlk, pkt);
|
||||
}
|
||||
}
|
||||
|
||||
structure(DMAResponseMsg, desc="...", interface="Message") {
|
||||
DMAResponseType Type, desc="Response type (DATA/ACK)";
|
||||
Addr PhysicalAddress, desc="Physical address for this request";
|
||||
Addr LineAddress, desc="Line address for this request";
|
||||
NetDest Destination, desc="Destination";
|
||||
DataBlock DataBlk, desc="DataBlk attached to this request";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
|
||||
bool functionalRead(Packet *pkt) {
|
||||
return testAndRead(LineAddress, DataBlk, pkt);
|
||||
}
|
||||
|
||||
bool functionalWrite(Packet *pkt) {
|
||||
return testAndWrite(LineAddress, DataBlk, pkt);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user