diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index 20a0979af1..be1243aaa5 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -65,7 +65,8 @@ machine(MachineType:TCC, "TCC Cache") AtomicPassOn, desc="Atomic Op Passed on to Directory"; AtomicDone, desc="AtomicOps Complete"; AtomicNotDone, desc="AtomicOps not Complete"; - Data, desc="data messgae"; + Data, desc="Data message"; + Flush, desc="Flush cache entry"; // Coming from this TCC L2_Repl, desc="L2 Replacement"; // Probes @@ -376,6 +377,8 @@ machine(MachineType:TCC, "TCC Cache") } else { trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); } + } else if (in_msg.Type == CoherenceRequestType:WriteFlush) { + trigger(Event:Flush, in_msg.addr, cache_entry, tbe); } else { DPRINTF(RubySlicc, "%s\n", in_msg); error("Unexpected Response Message to Core"); @@ -509,6 +512,20 @@ machine(MachineType:TCC, "TCC Cache") } } + action(fw_sendFlushResponse, "fw", desc="send Flush Response") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysWBAck; + out_msg.Destination.clear(); + out_msg.Destination.add(in_msg.Requestor); + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.instSeqNum := in_msg.instSeqNum; + } + } + } + action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") { peek(coreRequestNetwork_in, CPURequestMsg) { enqueue(responseToCore_out, ResponseMsg, l2_response_latency + glc_atomic_latency, true) { @@ -628,6 +645,22 @@ machine(MachineType:TCC, "TCC Cache") } } + action(f_flush, "f", desc="write back data") { + peek(coreRequestNetwork_in, CPURequestMsg) { + enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + out_msg.WTRequestor := in_msg.Requestor; + out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteFlush; + out_msg.Dirty := true; + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.writeMask.orMask(cache_entry.writeMask); + } + } + } + action(at_atomicThrough, "at", desc="write back data") { peek(coreRequestNetwork_in, CPURequestMsg) { enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) { @@ -1075,4 +1108,21 @@ machine(MachineType:TCC, "TCC Cache") transition(WIB, WBAck,I) { pr_popResponseQueue; } + + transition({A, IV, WI, WIB}, Flush) { + st_stallAndWaitRequest; + } + + transition(I, Flush) { + fw_sendFlushResponse; + p_popRequestQueue; + } + + transition({V, W}, Flush, I) {TagArrayRead, TagArrayWrite} { + t_allocateTBE; + ut_updateTag; + f_flush; + i_invL2; + p_popRequestQueue; + } } diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 7e0ad4ed96..8244879c55 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -55,6 +55,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") I, AccessPermission:Invalid, desc="Invalid"; V, AccessPermission:Read_Only, desc="Valid"; A, AccessPermission:Invalid, desc="Waiting on Atomic"; + + F, AccessPermission:Invalid, desc="Flushing; Waiting for Ack"; } enumeration(Event, desc="TCP Events") { @@ -256,6 +258,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") peek(responseToTCP_in, ResponseMsg, block_on="addr") { Entry cache_entry := getCacheEntry(in_msg.addr); TBE tbe := TBEs.lookup(in_msg.addr); + DPRINTF(RubySlicc, "In responseToTCP_in with %s\n", in_msg); + if (in_msg.Type == CoherenceResponseType:TDSysResp) { if (disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) { // If L1 is disabled or requests have GLC or SLC flag set, @@ -273,6 +277,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck || in_msg.Type == CoherenceResponseType:NBSysWBAck) { trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); + DPRINTF(RubySlicc, "Issuing TCC_AckWB\n"); } else { error("Unexpected Response Message to Core"); } @@ -469,6 +474,24 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") action(sf_setFlush, "sf", desc="set flush") { inFlush := true; APPEND_TRANSITION_COMMENT(" inFlush is true"); + enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { + out_msg.addr := address; + out_msg.Requestor := machineID; + assert(is_valid(cache_entry)); + out_msg.DataBlk := cache_entry.DataBlk; + out_msg.writeMask.clear(); + out_msg.writeMask.orMask(cache_entry.writeMask); + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Data; + out_msg.Type := CoherenceRequestType:WriteFlush; + out_msg.InitialRequestTime := curCycle(); + out_msg.Shared := false; + out_msg.isSLCSet := false; + peek(mandatoryQueue_in, RubyRequest) { + out_msg.instSeqNum := in_msg.instSeqNum; + } + } } action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { @@ -524,6 +547,16 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") cache_entry.Dirty := true; } + action(f_flushDone, "f", desc="flush done") { + assert(is_valid(cache_entry)); + + if (use_seq_not_coal) { + sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); + } else { + coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + } + } + action(inv_invDone, "inv", desc="local inv done") { if (use_seq_not_coal) { DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n"); @@ -695,11 +728,16 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") ic_invCache; } - transition({V, I, A},Flush) {TagArrayFlash} { + transition({V,I}, Flush, F) {TagArrayFlash} { + a_allocate; sf_setFlush; p_popMandatoryQueue; } + transition(A, Flush) { + z_stall; + } + transition({I, V}, Evict, I) {TagArrayFlash} { inv_invDone; p_popMandatoryQueue; @@ -716,4 +754,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") wd_wtDone; pr_popResponseQueue; } + + transition(F, TCC_AckWB, I) { + f_flushDone; + pr_popResponseQueue; + ic_invCache; + } } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 774b54a432..eed750832f 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -83,6 +83,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack"; + + F, AccessPermission:Busy, desc="sent Flus, blocked till ack"; } // Events @@ -120,6 +122,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") // DMA DmaRead, desc="DMA read"; DmaWrite, desc="DMA write"; + + // Flush + Flush, desc="Flush entry"; } enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { @@ -411,6 +416,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr); trigger(Event:VicClean, in_msg.addr, entry, tbe); } + } else if (in_msg.Type == CoherenceRequestType:WriteFlush) { + DPRINTF(RubySlicc, "Got Flush from %s on %s\n", in_msg.Requestor, in_msg.addr); + trigger(Event:Flush, in_msg.addr, entry, tbe); } else { error("Bad request message type"); } @@ -562,6 +570,23 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(rf_sendResponseFlush, "rf", desc="send Flush Ack") { + peek(memQueue_in, MemoryMsg) { + enqueue(responseNetwork_out, ResponseMsg, 1) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:NBSysWBAck; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.WTRequestor := tbe.WTRequestor; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Control; + out_msg.InitialRequestTime := tbe.InitialRequestTime; + out_msg.ForwardRequestTime := curCycle(); + out_msg.ProbeRequestStartTime := curCycle(); + //out_msg.instSeqNum := in_msg.instSeqNum; + } + } + } + action(l_queueMemWBReq, "lq", desc="Write WB data to memory") { peek(responseNetwork_in, ResponseMsg) { enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { @@ -933,6 +958,23 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(f_writeFlushDataToMemory, "f", desc="Write flush data to memory") { + peek(requestNetwork_in, CPURequestMsg) { + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := in_msg.DataBlk; + } + if (tbe.Dirty == false) { + // have to update the TBE, too, because of how this + // directory deals with functional writes + tbe.DataBlk := in_msg.DataBlk; + } + } + } + action(atd_allocateTBEforDMA, "atd", desc="allocate TBE Entry for DMA") { check_allocate(TBEs); peek(dmaRequestQueue_in, DMARequestMsg) { @@ -1553,4 +1595,17 @@ machine(MachineType:Directory, "AMD Baseline protocol") dt_deallocateTBE; pt_popTriggerQueue; } + + transition(U, Flush, F) {L3TagArrayRead, L3TagArrayWrite} { + t_allocateTBE; + f_writeFlushDataToMemory; + w_sendResponseWBAck; + p_popRequestQueue; + } + + transition(F, WBAck, U) { + pm_popMemQueue; + dt_deallocateTBE; + } + }