mem-ruby: Remove VIPER StoreThrough temp cache storage (#1156)

StoreThrough in VIPER when the TCP is disabled, GLC bit is set, or SLC
bit is set will bypass the TCP, but will temporarily allocate a cache
entry seemingly to handle write coalescing with valid blocks. It does
not attempt to evict a block if the set is full and the address is
invalid. This causes a panic if the set is full as there is no spare
cache entry to use temporarily to use for DataBlk manipulation. However,
a cache block is not required for this.

This commit removes using a cache block for StoreThrough with invalid
blocks as there is no existing data to coalesce with. It creates no
allocate variants of the actions needed in StoreThrough and pulls the
DataBlk information from the in_msg instead. Non-invalid blocks do not
have this panic as they have a cache entry already.

Fixes issues with StoreThroughs on more aggressive architectures like
MI300.

Change-Id: Id8687eccb991e967bb5292068cbe7686e0930d7d
This commit is contained in:
Matthew Poremba
2024-05-28 11:02:00 -07:00
committed by GitHub
parent 5ec1acaf5f
commit e82cf20150

View File

@@ -449,6 +449,28 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
}
}
action(wtna_writeThroughNoAlloc, "wtna", desc="Write through without allocation") {
enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
peek(mandatoryQueue_in, RubyRequest) {
out_msg.addr := address;
out_msg.Requestor := machineID;
out_msg.DataBlk.copyPartial(in_msg.WTData, in_msg.writeMask);
out_msg.writeMask := in_msg.writeMask;
out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
TCC_select_low_bit, TCC_select_num_bits));
out_msg.MessageSize := MessageSizeType:Data;
out_msg.Type := CoherenceRequestType:WriteThrough;
out_msg.InitialRequestTime := curCycle();
out_msg.Shared := false;
// forward inst sequence number to lower TCC
out_msg.instSeqNum := in_msg.instSeqNum;
out_msg.isGLCSet := in_msg.isGLCSet;
out_msg.isSLCSet := in_msg.isSLCSet;
}
}
}
action(at_atomicThrough, "at", desc="send Atomic") {
peek(mandatoryQueue_in, RubyRequest) {
enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
@@ -597,6 +619,19 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
cache_entry.Dirty := true;
}
action(sna_storeDoneMissNoAlloc, "sna", desc="local store done (misses in TCP)") {
peek(mandatoryQueue_in, RubyRequest) {
// writeCallback requires pass-by-reference and in_msg.WTData is a const value.
DataBlock tmp := in_msg.WTData;
if (use_seq_not_coal) {
sequencer.writeCallback(address, tmp, false, MachineType:L1Cache);
} else {
coalescer.writeCallback(address, MachineType:L1Cache, tmp);
}
}
}
action(f_flushDone, "f", desc="flush done") {
assert(is_valid(cache_entry));
@@ -755,12 +790,9 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
}
transition(I, StoreThrough) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
a_allocate;
dw_dirtyWrite;
s_storeDoneMiss;
sna_storeDoneMissNoAlloc;
uu_profileDataMiss;
wt_writeThrough;
ic_invCache;
wtna_writeThroughNoAlloc;
p_popMandatoryQueue;
}