mem-ruby: Add new feature far atomics in CHI (#177)
Added a new feature to CHI protocol (in collaboration with @tiagormk). Here is the Jira Ticket [https://gem5.atlassian.net/browse/GEM5-1326](https://gem5.atlassian.net/browse/GEM5-1326 ). As described in CHI specs, far atomic transactions enable remote execution of Atomic Memory Operations. This pull request incorporates several changes: * Fix Arm ISA definition of Swap instructions. These instructions should return an operand, so their ISA definition should be Return Operation. * Enable AMOs in Ruby Mem Test to verify that AMOs work * Enable near and far AMO in the Cache Controler of CHI Three configuration parameters have been used to tune this behavior: * policy_type: sets the atomic policy to one of the described in [our paper](https://dl.acm.org/doi/10.1145/3579371.3589065) * atomic_op_latency: simulates the AMO ALU operation latency * comp_anr: configures the Atomic No return transaction to split CompDBIDResp into two different messages DBIDResp and Comp
This commit is contained in:
@@ -62,6 +62,12 @@ parser.add_argument(
|
||||
default=0,
|
||||
help="percentage of accesses that should be functional",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--atomic",
|
||||
type=int,
|
||||
default=30,
|
||||
help="percentage of accesses that should be atomic",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suppress-func-errors",
|
||||
action="store_true",
|
||||
@@ -105,6 +111,7 @@ cpus = [
|
||||
max_loads=args.maxloads,
|
||||
percent_functional=args.functional,
|
||||
percent_uncacheable=0,
|
||||
percent_atomic=args.atomic,
|
||||
progress_interval=args.progress,
|
||||
suppress_func_errors=args.suppress_func_errors,
|
||||
)
|
||||
@@ -133,7 +140,7 @@ else:
|
||||
dmas = []
|
||||
|
||||
dma_ports = []
|
||||
for (i, dma) in enumerate(dmas):
|
||||
for i, dma in enumerate(dmas):
|
||||
dma_ports.append(dma.test)
|
||||
Ruby.create_system(args, False, system, dma_ports=dma_ports)
|
||||
|
||||
@@ -155,7 +162,7 @@ system.ruby.randomization = True
|
||||
|
||||
assert len(cpus) == len(system.ruby._cpu_ports)
|
||||
|
||||
for (i, cpu) in enumerate(cpus):
|
||||
for i, cpu in enumerate(cpus):
|
||||
#
|
||||
# Tie the cpu memtester ports to the correct system ports
|
||||
#
|
||||
|
||||
@@ -244,6 +244,7 @@ class CHI_L1Controller(CHI_Cache_Controller):
|
||||
self.alloc_on_readunique = True
|
||||
self.alloc_on_readonce = True
|
||||
self.alloc_on_writeback = True
|
||||
self.alloc_on_atomic = False
|
||||
self.dealloc_on_unique = False
|
||||
self.dealloc_on_shared = False
|
||||
self.dealloc_backinv_unique = True
|
||||
@@ -280,6 +281,7 @@ class CHI_L2Controller(CHI_Cache_Controller):
|
||||
self.alloc_on_readunique = True
|
||||
self.alloc_on_readonce = True
|
||||
self.alloc_on_writeback = True
|
||||
self.alloc_on_atomic = False
|
||||
self.dealloc_on_unique = False
|
||||
self.dealloc_on_shared = False
|
||||
self.dealloc_backinv_unique = True
|
||||
@@ -316,6 +318,7 @@ class CHI_HNFController(CHI_Cache_Controller):
|
||||
self.alloc_on_readunique = False
|
||||
self.alloc_on_readonce = True
|
||||
self.alloc_on_writeback = True
|
||||
self.alloc_on_atomic = True
|
||||
self.dealloc_on_unique = True
|
||||
self.dealloc_on_shared = False
|
||||
self.dealloc_backinv_unique = False
|
||||
@@ -392,6 +395,7 @@ class CHI_DMAController(CHI_Cache_Controller):
|
||||
self.alloc_on_readunique = False
|
||||
self.alloc_on_readonce = False
|
||||
self.alloc_on_writeback = False
|
||||
self.alloc_on_atomic = False
|
||||
self.dealloc_on_unique = False
|
||||
self.dealloc_on_shared = False
|
||||
self.dealloc_backinv_unique = False
|
||||
|
||||
@@ -827,35 +827,35 @@ let {{
|
||||
ret_op=False, flavor="release").emit(OP_DICT['MIN'])
|
||||
|
||||
AtomicArithmeticSingleOp("swpb", "SWPB", 1, unsign=True,
|
||||
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
|
||||
flavor="normal").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swplb", "SWPLB", 1, unsign=True,
|
||||
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
|
||||
flavor="release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpab", "SWPAB", 1, unsign=True,
|
||||
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
|
||||
flavor="acquire").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swplab", "SWPLAB", 1, unsign=True,
|
||||
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swph", "SWPH", 2, unsign=True,
|
||||
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
|
||||
flavor="normal").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swplh", "SWPLH", 2, unsign=True,
|
||||
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
|
||||
flavor="release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpah", "SWPAH", 2, unsign=True,
|
||||
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
|
||||
flavor="acquire").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swplah", "SWPLAH", 2, unsign=True,
|
||||
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swp", "SWP", 4, unsign=True,
|
||||
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
|
||||
flavor="normal").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpl", "SWPL", 4, unsign=True,
|
||||
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
|
||||
flavor="release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpa", "SWPA", 4, unsign=True,
|
||||
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
|
||||
flavor="acquire").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpla", "SWPLA", 4, unsign=True,
|
||||
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swp64", "SWP64", 8, unsign=True,
|
||||
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
|
||||
flavor="normal").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpl64", "SWPL64", 8, unsign=True,
|
||||
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
|
||||
flavor="release").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpa64", "SWPA64", 8, unsign=True,
|
||||
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
|
||||
flavor="acquire").emit(OP_DICT['SWP'])
|
||||
AtomicArithmeticSingleOp("swpla64", "SWPLA64", 8, unsign=True,
|
||||
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
flavor="acquire_release").emit(OP_DICT['SWP'])
|
||||
}};
|
||||
|
||||
@@ -63,6 +63,7 @@ class MemTest(ClockedObject):
|
||||
percent_reads = Param.Percent(65, "Percentage reads")
|
||||
percent_functional = Param.Percent(50, "Percentage functional accesses")
|
||||
percent_uncacheable = Param.Percent(10, "Percentage uncacheable")
|
||||
percent_atomic = Param.Percent(50, "Percentage atomics")
|
||||
|
||||
# Determine how often to print progress messages and what timeout
|
||||
# to use for checking progress of both requests and responses
|
||||
|
||||
@@ -94,6 +94,7 @@ MemTest::MemTest(const Params &p)
|
||||
percentReads(p.percent_reads),
|
||||
percentFunctional(p.percent_functional),
|
||||
percentUncacheable(p.percent_uncacheable),
|
||||
percentAtomic(p.percent_atomic),
|
||||
requestorId(p.system->getRequestorId(this)),
|
||||
blockSize(p.system->cacheLineSize()),
|
||||
blockAddrMask(blockSize - 1),
|
||||
@@ -115,6 +116,7 @@ MemTest::MemTest(const Params &p)
|
||||
// set up counters
|
||||
numReads = 0;
|
||||
numWrites = 0;
|
||||
numAtomics = 0;
|
||||
|
||||
// kick things into action
|
||||
schedule(tickEvent, curTick());
|
||||
@@ -142,7 +144,7 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
|
||||
outstandingAddrs.erase(remove_addr);
|
||||
|
||||
DPRINTF(MemTest, "Completing %s at address %x (blk %x) %s\n",
|
||||
pkt->isWrite() ? "write" : "read",
|
||||
pkt->isWrite() ? pkt->isAtomicOp() ? "atomic" : "write" : "read",
|
||||
req->getPaddr(), blockAlign(req->getPaddr()),
|
||||
pkt->isError() ? "error" : "success");
|
||||
|
||||
@@ -153,7 +155,25 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
|
||||
panic( "%s access failed at %#x\n",
|
||||
pkt->isWrite() ? "Write" : "Read", req->getPaddr());
|
||||
} else {
|
||||
if (pkt->isRead()) {
|
||||
if (pkt->isAtomicOp()) {
|
||||
uint8_t ref_data = referenceData[req->getPaddr()];
|
||||
if (pkt_data[0] != ref_data) {
|
||||
panic("%s: read of %x (blk %x) @ cycle %d "
|
||||
"returns %x, expected %x\n", name(),
|
||||
req->getPaddr(), blockAlign(req->getPaddr()), curTick(),
|
||||
pkt_data[0], ref_data);
|
||||
}
|
||||
DPRINTF(MemTest,
|
||||
"Completing atomic at address %x (blk %x) value %x\n",
|
||||
req->getPaddr(), blockAlign(req->getPaddr()),
|
||||
pkt_data[0]);
|
||||
|
||||
referenceData[req->getPaddr()] =
|
||||
atomicPendingData[req->getPaddr()];
|
||||
|
||||
numAtomics++;
|
||||
stats.numAtomics++;
|
||||
} else if (pkt->isRead()) {
|
||||
uint8_t ref_data = referenceData[req->getPaddr()];
|
||||
if (pkt_data[0] != ref_data) {
|
||||
panic("%s: read of %x (blk %x) @ cycle %d "
|
||||
@@ -167,9 +187,10 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
|
||||
|
||||
if (numReads == (uint64_t)nextProgressMessage) {
|
||||
ccprintf(std::cerr,
|
||||
"%s: completed %d read, %d write accesses @%d\n",
|
||||
name(), numReads, numWrites, curTick());
|
||||
nextProgressMessage += progressInterval;
|
||||
"%s: completed %d read, %d write, "
|
||||
"%d atomic accesses @%d\n",
|
||||
name(), numReads, numWrites, numAtomics, curTick());
|
||||
nextProgressMessage += progressInterval;
|
||||
}
|
||||
|
||||
if (maxLoads != 0 && numReads >= maxLoads)
|
||||
@@ -205,7 +226,9 @@ MemTest::MemTestStats::MemTestStats(statistics::Group *parent)
|
||||
ADD_STAT(numReads, statistics::units::Count::get(),
|
||||
"number of read accesses completed"),
|
||||
ADD_STAT(numWrites, statistics::units::Count::get(),
|
||||
"number of write accesses completed")
|
||||
"number of write accesses completed"),
|
||||
ADD_STAT(numAtomics, statistics::units::Count::get(),
|
||||
"number of atomic accesses completed")
|
||||
{
|
||||
|
||||
}
|
||||
@@ -221,6 +244,8 @@ MemTest::tick()
|
||||
unsigned cmd = random_mt.random(0, 100);
|
||||
uint8_t data = random_mt.random<uint8_t>();
|
||||
bool uncacheable = random_mt.random(0, 100) < percentUncacheable;
|
||||
bool do_atomic = (random_mt.random(0, 100) < percentAtomic) &&
|
||||
!uncacheable;
|
||||
unsigned base = random_mt.random(0, 1);
|
||||
Request::Flags flags;
|
||||
Addr paddr;
|
||||
@@ -281,13 +306,36 @@ MemTest::tick()
|
||||
pkt = new Packet(req, MemCmd::ReadReq);
|
||||
pkt->dataDynamic(pkt_data);
|
||||
} else {
|
||||
DPRINTF(MemTest, "Initiating %swrite at addr %x (blk %x) value %x\n",
|
||||
do_functional ? "functional " : "", req->getPaddr(),
|
||||
blockAlign(req->getPaddr()), data);
|
||||
if (do_atomic) {
|
||||
DPRINTF(MemTest,
|
||||
"Initiating atomic at addr %x (blk %x) value %x\n",
|
||||
req->getPaddr(), blockAlign(req->getPaddr()), data);
|
||||
|
||||
pkt = new Packet(req, MemCmd::WriteReq);
|
||||
pkt->dataDynamic(pkt_data);
|
||||
pkt_data[0] = data;
|
||||
TypedAtomicOpFunctor<uint8_t> *_amo_op =
|
||||
new AtomicGeneric3Op<uint8_t>(
|
||||
data, data,
|
||||
[](uint8_t* b, uint8_t a, uint8_t c){
|
||||
*b = c;
|
||||
});
|
||||
assert(_amo_op);
|
||||
AtomicOpFunctorPtr amo_op = AtomicOpFunctorPtr(_amo_op);
|
||||
req->setAtomicOpFunctor(std::move(amo_op));
|
||||
req->setFlags(Request::ATOMIC_RETURN_OP);
|
||||
|
||||
pkt = new Packet(req, MemCmd::WriteReq);
|
||||
pkt->dataDynamic(pkt_data);
|
||||
pkt_data[0] = data;
|
||||
atomicPendingData[req->getPaddr()] = data;
|
||||
} else {
|
||||
DPRINTF(MemTest,
|
||||
"Initiating %swrite at addr %x (blk %x) value %x\n",
|
||||
do_functional ? "functional " : "", req->getPaddr(),
|
||||
blockAlign(req->getPaddr()), data);
|
||||
|
||||
pkt = new Packet(req, MemCmd::WriteReq);
|
||||
pkt->dataDynamic(pkt_data);
|
||||
pkt_data[0] = data;
|
||||
}
|
||||
}
|
||||
|
||||
// there is no point in ticking if we are waiting for a retry
|
||||
|
||||
@@ -131,6 +131,7 @@ class MemTest : public ClockedObject
|
||||
const unsigned percentReads;
|
||||
const unsigned percentFunctional;
|
||||
const unsigned percentUncacheable;
|
||||
const unsigned percentAtomic;
|
||||
|
||||
/** Request id for all generated traffic */
|
||||
RequestorID requestorId;
|
||||
@@ -138,6 +139,7 @@ class MemTest : public ClockedObject
|
||||
unsigned int id;
|
||||
|
||||
std::unordered_set<Addr> outstandingAddrs;
|
||||
std::unordered_map<Addr, uint8_t> atomicPendingData;
|
||||
|
||||
// store the expected value for the addresses we have touched
|
||||
std::unordered_map<Addr, uint8_t> referenceData;
|
||||
@@ -169,6 +171,7 @@ class MemTest : public ClockedObject
|
||||
|
||||
uint64_t numReads;
|
||||
uint64_t numWrites;
|
||||
uint64_t numAtomics;
|
||||
const uint64_t maxLoads;
|
||||
|
||||
const bool atomic;
|
||||
@@ -180,6 +183,7 @@ class MemTest : public ClockedObject
|
||||
MemTestStats(statistics::Group *parent);
|
||||
statistics::Scalar numReads;
|
||||
statistics::Scalar numWrites;
|
||||
statistics::Scalar numAtomics;
|
||||
} stats;
|
||||
|
||||
/**
|
||||
|
||||
@@ -757,6 +757,13 @@ class Request : public Extensible<Request>
|
||||
return atomicOpFunctor.get();
|
||||
}
|
||||
|
||||
void
|
||||
setAtomicOpFunctor(AtomicOpFunctorPtr amo_op)
|
||||
{
|
||||
atomicOpFunctor = std::move(amo_op);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Accessor for hardware transactional memory abort cause.
|
||||
*/
|
||||
|
||||
@@ -139,6 +139,13 @@ structure (Sequencer, external = "yes") {
|
||||
Cycles, Cycles, Cycles);
|
||||
void writeUniqueCallback(Addr, DataBlock);
|
||||
|
||||
void atomicCallback(Addr, DataBlock);
|
||||
void atomicCallback(Addr, DataBlock, bool);
|
||||
void atomicCallback(Addr, DataBlock, bool, MachineType);
|
||||
void atomicCallback(Addr, DataBlock, bool, MachineType,
|
||||
Cycles, Cycles, Cycles);
|
||||
|
||||
|
||||
void unaddressedCallback(Addr, RubyRequestType);
|
||||
void unaddressedCallback(Addr, RubyRequestType, MachineType);
|
||||
void unaddressedCallback(Addr, RubyRequestType, MachineType,
|
||||
|
||||
@@ -148,15 +148,22 @@ action(AllocateTBE_SeqRequest, desc="") {
|
||||
out_msg.is_remote_pf := false;
|
||||
out_msg.txnId := max_outstanding_transactions;
|
||||
|
||||
out_msg.atomic_op.clear();
|
||||
out_msg.atomic_op.orMask(in_msg.writeMask);
|
||||
|
||||
if ((in_msg.Type == RubyRequestType:LD) ||
|
||||
(in_msg.Type == RubyRequestType:IFETCH)) {
|
||||
out_msg.type := CHIRequestType:Load;
|
||||
} else if (in_msg.Type == RubyRequestType:ST) {
|
||||
} else if (in_msg.Type == RubyRequestType:ST) {
|
||||
if (in_msg.Size == blockSize) {
|
||||
out_msg.type := CHIRequestType:StoreLine;
|
||||
} else {
|
||||
out_msg.type := CHIRequestType:Store;
|
||||
}
|
||||
} else if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) {
|
||||
out_msg.type := CHIRequestType:AtomicLoad;
|
||||
} else if (in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN){
|
||||
out_msg.type := CHIRequestType:AtomicStore;
|
||||
} else {
|
||||
error("Invalid RubyRequestType");
|
||||
}
|
||||
@@ -769,6 +776,148 @@ action(Initiate_StoreMiss, desc="") {
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_Atomic_UC, desc="") {
|
||||
if ((policy_type == 0) || // ALL NEAR
|
||||
(policy_type == 1) || // UNIQUE NEAR
|
||||
(policy_type == 2) // PRESENT NEAR
|
||||
){
|
||||
tbe.actions.push(Event:DataArrayRead);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:AtomicHit);
|
||||
tbe.actions.pushNB(Event:DataArrayWrite);
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_Atomic_UD, desc="") {
|
||||
if ((policy_type == 0) || // ALL NEAR
|
||||
(policy_type == 1) || // UNIQUE NEAR
|
||||
(policy_type == 2) // PRESENT NEAR
|
||||
){
|
||||
tbe.actions.push(Event:DataArrayRead);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:AtomicHit);
|
||||
tbe.actions.pushNB(Event:DataArrayWrite);
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_I, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if ((policy_type == 1) || // UNIQUE NEAR
|
||||
(policy_type == 2)) { // PRESENT NEAR
|
||||
tbe.actions.push(Event:SendAtomicReturn_NoWait);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_I, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if (policy_type == 1) { // UNIQUE NEAR
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.push(Event:SendANRData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_SD, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if (policy_type == 1) { // UNIQUE NEAR
|
||||
tbe.actions.push(Event:SendAtomicReturn_NoWait);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_SD, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if (policy_type == 1) { // UNIQUE NEAR
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.push(Event:SendANRData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_SC, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if (policy_type == 1) { // UNIQUE NEAR
|
||||
tbe.actions.push(Event:SendAtomicReturn_NoWait);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_SC, desc="") {
|
||||
if (policy_type == 0){ // ALL NEAR
|
||||
tbe.actions.push(Event:SendReadUnique);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.atomic_to_be_done := true;
|
||||
} else if (policy_type == 1) { // UNIQUE NEAR
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.push(Event:SendANRData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.doCacheFill := false;
|
||||
tbe.atomic_to_be_done := false;
|
||||
} else {
|
||||
error("Invalid policy type");
|
||||
}
|
||||
}
|
||||
|
||||
action(Initiate_StoreUpgrade, desc="") {
|
||||
assert(tbe.dataValid);
|
||||
assert(is_valid(cache_entry));
|
||||
@@ -865,8 +1014,111 @@ action(Initiate_WriteUnique_Forward, desc="") {
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_LocalWrite, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
|
||||
tbe.actions.push(Event:SendSnpUnique);
|
||||
} else if (tbe.dir_sharers.count() > 0){
|
||||
// no one will send us data unless we explicitly ask
|
||||
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
|
||||
}
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
|
||||
action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
|
||||
tbe.actions.push(Event:SendSnpUnique);
|
||||
} else if (tbe.dir_sharers.count() > 0){
|
||||
// no one will send us data unless we explicitly ask
|
||||
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
|
||||
}
|
||||
if (comp_wu) {
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
|
||||
action(Initiate_AtomicReturn_Forward, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) &&
|
||||
(tbe.dir_sharers.isElement(tbe.requestor))){
|
||||
tbe.dir_sharers.remove(tbe.requestor);
|
||||
}
|
||||
tbe.actions.push(Event:SendAtomicReturn);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
|
||||
tbe.dataToBeInvalid := true;
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_Forward, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) &&
|
||||
(tbe.dir_sharers.isElement(tbe.requestor))){
|
||||
tbe.dir_sharers.remove(tbe.requestor);
|
||||
}
|
||||
if (comp_wu) {
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
}
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:SendANRData);
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
|
||||
tbe.dataToBeInvalid := true;
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_Miss, desc="") {
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_Miss, desc="") {
|
||||
assert(is_HN);
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
if (comp_wu) {
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_CopyBack, desc="") {
|
||||
// expect to receive this data after Send_CompDBIDResp
|
||||
if (tbe.reqType == CHIRequestType:WriteBackFull) {
|
||||
@@ -1157,7 +1409,9 @@ action(Send_ReadShared, desc="") {
|
||||
|
||||
action(Send_ReadNoSnp, desc="") {
|
||||
assert(is_HN);
|
||||
assert(tbe.use_DMT == false);
|
||||
assert((tbe.use_DMT == false) ||
|
||||
((tbe.reqType == CHIRequestType:AtomicReturn) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicNoReturn)));
|
||||
|
||||
clearExpectedReqResp(tbe);
|
||||
tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC);
|
||||
@@ -1368,6 +1622,45 @@ action(Send_WriteUnique, desc="") {
|
||||
tbe.expected_req_resp.addExpectedCount(1);
|
||||
}
|
||||
|
||||
action(Send_AtomicReturn, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
|
||||
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
|
||||
prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
|
||||
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
|
||||
allowRequestRetry(tbe, out_msg);
|
||||
}
|
||||
clearExpectedReqResp(tbe);
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
|
||||
tbe.expected_req_resp.addExpectedCount(1);
|
||||
}
|
||||
|
||||
action(Send_AtomicReturn_NoWait, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
|
||||
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
|
||||
prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
|
||||
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
|
||||
allowRequestRetry(tbe, out_msg);
|
||||
}
|
||||
|
||||
tbe.dataAMOValid := false;
|
||||
}
|
||||
|
||||
action(Send_AtomicNoReturn, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
|
||||
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
|
||||
prepareRequestAtomic(tbe, CHIRequestType:AtomicNoReturn, out_msg);
|
||||
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
|
||||
allowRequestRetry(tbe, out_msg);
|
||||
}
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp);
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
|
||||
tbe.expected_req_resp.addExpectedCount(1);
|
||||
}
|
||||
|
||||
|
||||
action(Send_SnpCleanInvalid, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
assert(tbe.expected_snp_resp.hasExpected() == false);
|
||||
@@ -1636,6 +1929,20 @@ action(ExpectNCBWrData, desc="") {
|
||||
tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize, false);
|
||||
}
|
||||
|
||||
action(ExpectNCBWrData_A, desc="") {
|
||||
// Expected data
|
||||
int num_msgs := tbe.accSize / data_channel_size;
|
||||
if ((tbe.accSize % data_channel_size) != 0) {
|
||||
num_msgs := num_msgs + 1;
|
||||
}
|
||||
tbe.expected_req_resp.clear(num_msgs);
|
||||
tbe.expected_req_resp.addExpectedDataType(CHIDataType:NCBWrData);
|
||||
tbe.expected_req_resp.setExpectedCount(1);
|
||||
|
||||
// In atomic operations we do not expect real data for the current block
|
||||
// Thus the mask bits do not care
|
||||
}
|
||||
|
||||
action(ExpectCompAck, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompAck);
|
||||
@@ -1658,7 +1965,22 @@ action(Receive_ReqDataResp, desc="") {
|
||||
}
|
||||
// Copy data to tbe only if we didn't have valid data or the received
|
||||
// data is dirty
|
||||
if ((tbe.dataBlkValid.isFull() == false) ||
|
||||
if ((in_msg.type == CHIDataType:NCBWrData) &&
|
||||
((tbe.reqType == CHIRequestType:AtomicReturn) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicNoReturn))){
|
||||
// DO NOTHING
|
||||
} else if ((in_msg.type == CHIDataType:CompData_I) &&
|
||||
((tbe.reqType == CHIRequestType:AtomicReturn) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicLoad))) {
|
||||
if(tbe.dataBlkValid.isFull()){
|
||||
tbe.dataBlkValid.clear();
|
||||
}
|
||||
tbe.oldDataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask);
|
||||
assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false);
|
||||
tbe.dataBlkValid.orMask(in_msg.bitMask);
|
||||
DPRINTF(RubySlicc, "Received %s\n", tbe.oldDataBlk);
|
||||
DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid);
|
||||
} else if ((tbe.dataBlkValid.isFull() == false) ||
|
||||
(in_msg.type == CHIDataType:CompData_UD_PD) ||
|
||||
(in_msg.type == CHIDataType:CompData_SD_PD) ||
|
||||
(in_msg.type == CHIDataType:CBWrData_UD_PD) ||
|
||||
@@ -1683,7 +2005,8 @@ action(Receive_RespSepDataFromCompData, desc="") {
|
||||
if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) {
|
||||
error("Received unexpected message");
|
||||
}
|
||||
if (is_HN == false) {
|
||||
if ((is_HN == false) && (tbe.reqType != CHIRequestType:AtomicReturn) &&
|
||||
((tbe.reqType != CHIRequestType:AtomicLoad) || (tbe.atomic_to_be_done == true))){
|
||||
// must now ack the responder
|
||||
tbe.actions.pushFrontNB(Event:SendCompAck);
|
||||
}
|
||||
@@ -1905,6 +2228,7 @@ action(UpdateDataState_FromReqDataResp, desc="") {
|
||||
|
||||
} else if (in_msg.type == CHIDataType:CompData_I) {
|
||||
tbe.dataValid := true;
|
||||
tbe.dataAMOValid := true;
|
||||
tbe.dataToBeInvalid := true;
|
||||
assert(tbe.dataMaybeDirtyUpstream == false);
|
||||
|
||||
@@ -1946,7 +2270,9 @@ action(UpdateDataState_FromReqDataResp, desc="") {
|
||||
|
||||
action(UpdateDataState_FromWUDataResp, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
if (tbe.expected_req_resp.hasReceivedData()) {
|
||||
if (tbe.expected_req_resp.hasReceivedData() &&
|
||||
(tbe.reqType != CHIRequestType:AtomicReturn) &&
|
||||
(tbe.reqType != CHIRequestType:AtomicNoReturn)) {
|
||||
assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)));
|
||||
assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)
|
||||
+ tbe.accSize - 1));
|
||||
@@ -1964,6 +2290,22 @@ action(UpdateDataState_FromWUDataResp, desc="") {
|
||||
printTBEState(tbe);
|
||||
}
|
||||
|
||||
action(UpdateDataState_FromADataResp, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
if (is_HN && (tbe.expected_req_resp.hasReceivedData()) &&
|
||||
((tbe.reqType == CHIRequestType:AtomicReturn) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicNoReturn))) {
|
||||
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
|
||||
|
||||
tbe.oldDataBlk := tbe.dataBlk;
|
||||
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
|
||||
tbe.dataDirty := true;
|
||||
|
||||
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
|
||||
}
|
||||
printTBEState(tbe);
|
||||
}
|
||||
|
||||
action(UpdateDataState_FromCUResp, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
peek(rspInPort, CHIResponseMsg) {
|
||||
@@ -2127,6 +2469,10 @@ action(Receive_ReqResp_WUNeedComp, desc="") {
|
||||
tbe.defer_expected_comp := true;
|
||||
}
|
||||
|
||||
action(Receive_ReqResp_AR, desc="") {
|
||||
tbe.actions.pushFrontNB(Event:SendARData);
|
||||
}
|
||||
|
||||
action(Receive_ReqResp_WUComp, desc="") {
|
||||
if (tbe.defer_expected_comp) {
|
||||
tbe.defer_expected_comp := false;
|
||||
@@ -2320,6 +2666,36 @@ action(CheckWUComp, desc="") {
|
||||
}
|
||||
}
|
||||
|
||||
action(Send_ARData, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.snd_msgType := CHIDataType:NCBWrData;
|
||||
tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
|
||||
setupPendingAtomicSend(tbe);
|
||||
}
|
||||
|
||||
action(Send_ANRData, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.snd_msgType := CHIDataType:NCBWrData;
|
||||
tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
|
||||
setupPendingAtomicSend(tbe);
|
||||
}
|
||||
|
||||
action(CheckARComp, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I);
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData);
|
||||
tbe.expected_req_resp.addExpectedCount(2);
|
||||
}
|
||||
|
||||
action(CheckANRComp, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
if (tbe.defer_expected_comp) {
|
||||
tbe.defer_expected_comp := false;
|
||||
tbe.expected_req_resp.addExpectedCount(1);
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp);
|
||||
}
|
||||
}
|
||||
|
||||
action(Send_SnpRespData, desc="") {
|
||||
assert(is_HN == false);
|
||||
assert(is_valid(tbe));
|
||||
@@ -2531,7 +2907,12 @@ action(Send_Data, desc="") {
|
||||
}
|
||||
tbe.snd_pendBytes.setMask(offset, range, false);
|
||||
|
||||
out_msg.dataBlk := tbe.dataBlk;
|
||||
if (tbe.reqType == CHIRequestType:AtomicReturn){
|
||||
out_msg.dataBlk := tbe.oldDataBlk;
|
||||
} else {
|
||||
out_msg.dataBlk := tbe.dataBlk;
|
||||
}
|
||||
|
||||
out_msg.bitMask.setMask(offset, range);
|
||||
|
||||
out_msg.responder := machineID;
|
||||
@@ -2673,6 +3054,36 @@ action(Send_Comp_WU, desc="") {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
action(Send_CompData_AR, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
assert(tbe.dataValid);
|
||||
|
||||
if (is_HN) {
|
||||
tbe.oldDataBlk := tbe.dataBlk;
|
||||
}
|
||||
|
||||
tbe.snd_msgType := CHIDataType:CompData_I;
|
||||
tbe.dataMaybeDirtyUpstream := false;
|
||||
tbe.requestorToBeExclusiveOwner := false;
|
||||
tbe.requestorToBeOwner := false;
|
||||
tbe.snd_destination := tbe.requestor;
|
||||
setupPendingSend(tbe);
|
||||
printTBEState(tbe);
|
||||
|
||||
}
|
||||
|
||||
action(Send_Comp_ANR, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
enqueue(rspOutPort, CHIResponseMsg, comp_anr_latency + response_latency) {
|
||||
out_msg.addr := address;
|
||||
out_msg.type := CHIResponseType:Comp;
|
||||
out_msg.responder := machineID;
|
||||
out_msg.Destination.add(tbe.requestor);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
action(Send_SnpRespI, desc="") {
|
||||
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
|
||||
out_msg.addr := address;
|
||||
@@ -3003,6 +3414,22 @@ action(Callback_StoreHit, desc="") {
|
||||
}
|
||||
}
|
||||
|
||||
action(Callback_AtomicHit, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
assert(tbe.dataValid);
|
||||
assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicStore));
|
||||
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
|
||||
|
||||
DataBlock oldDataBlk;
|
||||
oldDataBlk := tbe.dataBlk;
|
||||
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
|
||||
|
||||
sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
|
||||
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
|
||||
tbe.dataDirty := true;
|
||||
}
|
||||
|
||||
action(Callback_ExpressPrefetchHit, desc="") {
|
||||
// have not allocated TBE, but must clear the reservation
|
||||
assert(is_invalid(tbe));
|
||||
@@ -3051,6 +3478,25 @@ action(Callback_Miss, desc="") {
|
||||
// also decay the timeout
|
||||
scLockDecayLatency();
|
||||
}
|
||||
} else if (tbe.dataValid && tbe.atomic_to_be_done &&
|
||||
((tbe.reqType == CHIRequestType:AtomicLoad) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicStore))){
|
||||
assert(is_valid(tbe));
|
||||
assert(tbe.dataValid);
|
||||
assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicStore));
|
||||
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
|
||||
|
||||
DataBlock oldDataBlk;
|
||||
oldDataBlk := tbe.dataBlk;
|
||||
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
|
||||
|
||||
sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
|
||||
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
|
||||
tbe.dataDirty := true;
|
||||
} else if (tbe.dataValid && tbe.dataAMOValid && (tbe.reqType == CHIRequestType:AtomicLoad)) {
|
||||
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.oldDataBlk);
|
||||
sequencer.atomicCallback(tbe.addr, tbe.oldDataBlk, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3070,6 +3516,18 @@ action(Unset_Timeout_Cache, desc="") {
|
||||
wakeup_port(snpRdyPort, address);
|
||||
}
|
||||
|
||||
action(Callback_AtomicNoReturn, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
|
||||
assert((tbe.reqType == CHIRequestType:AtomicNoReturn) ||
|
||||
(tbe.reqType == CHIRequestType:AtomicStore));
|
||||
|
||||
if(tbe.reqType == CHIRequestType:AtomicStore){
|
||||
sequencer.atomicCallback(tbe.addr, tbe.dataBlk);
|
||||
DPRINTF(RubySlicc, "AtomicNoReturn %s\n", tbe.dataBlk);
|
||||
}
|
||||
}
|
||||
|
||||
action(Callback_WriteUnique, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
|
||||
@@ -3183,7 +3641,7 @@ action(Profile_OutgoingEnd_DatalessResp, desc="") {
|
||||
action(TagArrayRead, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.delayNextAction := curTick() + cyclesToTicks(
|
||||
tagLatency(fromSequencer(tbe.reqType)));
|
||||
tagLatency(fromSequencer(tbe.reqType)));
|
||||
}
|
||||
|
||||
action(TagArrayWrite, desc="") {
|
||||
@@ -3235,6 +3693,11 @@ action(FillPipe, desc="") {
|
||||
tbe.delayNextAction := curTick() + cyclesToTicks(fill_latency);
|
||||
}
|
||||
|
||||
action(DelayAtomic, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.delayNextAction := curTick() + cyclesToTicks(atomic_op_latency);
|
||||
}
|
||||
|
||||
action(SnpSharedPipe, desc="") {
|
||||
assert(is_valid(tbe));
|
||||
tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency);
|
||||
|
||||
@@ -302,7 +302,9 @@ Cycles dataLatency() {
|
||||
bool fromSequencer(CHIRequestType reqType) {
|
||||
return reqType == CHIRequestType:Load ||
|
||||
reqType == CHIRequestType:Store ||
|
||||
reqType == CHIRequestType:StoreLine;
|
||||
reqType == CHIRequestType:StoreLine ||
|
||||
reqType == CHIRequestType:AtomicLoad ||
|
||||
tbe.reqType == CHIRequestType:AtomicStore;
|
||||
}
|
||||
|
||||
bool inCache(Addr addr) {
|
||||
@@ -434,6 +436,9 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
|
||||
tbe.is_local_pf := in_msg.is_local_pf;
|
||||
tbe.is_remote_pf := in_msg.is_remote_pf;
|
||||
|
||||
tbe.atomic_op.clear();
|
||||
tbe.atomic_op.orMask(in_msg.atomic_op);
|
||||
|
||||
tbe.use_DMT := false;
|
||||
tbe.use_DCT := false;
|
||||
|
||||
@@ -622,6 +627,13 @@ void setupPendingPartialSend(TBE tbe) {
|
||||
scheduleSendData(tbe, 0);
|
||||
}
|
||||
|
||||
void setupPendingAtomicSend(TBE tbe) {
|
||||
assert(blockSize >= data_channel_size);
|
||||
assert((blockSize % data_channel_size) == 0);
|
||||
tbe.snd_pendBytes.setMask(0,tbe.accSize,true);
|
||||
scheduleSendData(tbe, 0);
|
||||
}
|
||||
|
||||
// common code for downstream requests
|
||||
void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
|
||||
out_msg.addr := tbe.addr;
|
||||
@@ -644,6 +656,17 @@ void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
|
||||
assert(tbe.txnId != static_cast(Addr, "value", -1));
|
||||
}
|
||||
|
||||
void prepareRequestAtomic(TBE tbe, CHIRequestType type,
|
||||
CHIRequestMsg & out_msg) {
|
||||
assert((type == CHIRequestType:AtomicReturn) ||
|
||||
(type == CHIRequestType:AtomicNoReturn));
|
||||
prepareRequest(tbe, type, out_msg);
|
||||
out_msg.accAddr := tbe.accAddr;
|
||||
out_msg.accSize := tbe.accSize;
|
||||
out_msg.atomic_op.clear();
|
||||
out_msg.atomic_op.orMask(tbe.atomic_op);
|
||||
}
|
||||
|
||||
void allowRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
|
||||
out_msg.allowRetry := true;
|
||||
tbe.pendReqAllowRetry := true;
|
||||
@@ -672,6 +695,8 @@ void prepareRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
|
||||
out_msg.seqReq := tbe.seqReq;
|
||||
out_msg.is_local_pf := false;
|
||||
out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf;
|
||||
out_msg.atomic_op.clear();
|
||||
out_msg.atomic_op.orMask(tbe.atomic_op);
|
||||
}
|
||||
|
||||
void prepareRequestRetryDVM(TBE tbe, CHIRequestMsg & out_msg) {
|
||||
@@ -773,8 +798,12 @@ bool needCacheEntry(CHIRequestType req_type,
|
||||
(req_type == CHIRequestType:WriteEvictFull) ||
|
||||
(is_HN && (req_type == CHIRequestType:WriteUniqueFull)))) ||
|
||||
(alloc_on_seq_acc && ((req_type == CHIRequestType:Load) ||
|
||||
(req_type == CHIRequestType:Store))) ||
|
||||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine));
|
||||
(req_type == CHIRequestType:Store) ||
|
||||
(req_type == CHIRequestType:AtomicLoad) ||
|
||||
(req_type == CHIRequestType:AtomicStore))) ||
|
||||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) ||
|
||||
(alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) ||
|
||||
(req_type == CHIRequestType:AtomicNoReturn)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1174,6 +1203,10 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
|
||||
return Event:Store;
|
||||
} else if (type == CHIRequestType:StoreLine) {
|
||||
return Event:Store;
|
||||
} else if (type == CHIRequestType:AtomicLoad) {
|
||||
return Event:AtomicLoad;
|
||||
} else if (type == CHIRequestType:AtomicStore){
|
||||
return Event:AtomicStore;
|
||||
} else if (type == CHIRequestType:ReadShared) {
|
||||
return Event:ReadShared;
|
||||
} else if (type == CHIRequestType:ReadNotSharedDirty) {
|
||||
@@ -1214,6 +1247,18 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
|
||||
return Event:DvmTlbi_Initiate;
|
||||
} else if (type == CHIRequestType:DvmSync_Initiate) {
|
||||
return Event:DvmSync_Initiate;
|
||||
} else if (type == CHIRequestType:AtomicReturn){
|
||||
if (is_HN) {
|
||||
return Event:AtomicReturn_PoC;
|
||||
} else {
|
||||
return Event:AtomicReturn;
|
||||
}
|
||||
} else if (type == CHIRequestType:AtomicNoReturn){
|
||||
if (is_HN) {
|
||||
return Event:AtomicNoReturn_PoC;
|
||||
} else {
|
||||
return Event:AtomicNoReturn;
|
||||
}
|
||||
} else {
|
||||
error("Invalid CHIRequestType");
|
||||
}
|
||||
|
||||
@@ -155,6 +155,12 @@ transition({BUSY_INTR,BUSY_BLKD}, FillPipe) {
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition({BUSY_INTR,BUSY_BLKD}, DelayAtomic) {
|
||||
Pop_TriggerQueue;
|
||||
DelayAtomic;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition({BUSY_INTR,BUSY_BLKD}, SnpSharedPipe) {
|
||||
Pop_TriggerQueue;
|
||||
SnpSharedPipe;
|
||||
@@ -418,8 +424,82 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) {
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
// AtomicReturn and AtomicNoReturn
|
||||
|
||||
// Load / Store from sequencer & Prefetch from prefetcher
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_Forward;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_Forward;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
AtomicReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_LocalWrite;
|
||||
Profile_Hit;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_LocalWrite;
|
||||
Profile_Hit;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(I, AtomicReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_Miss;
|
||||
Allocate_DirEntry;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(I, AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_Miss;
|
||||
Allocate_DirEntry;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
|
||||
// Load / Store / Atomic from sequencer & Prefetch from prefetcher
|
||||
|
||||
transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
@@ -460,6 +540,28 @@ transition(BUSY_BLKD, StoreHit) {
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(UC, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_Atomic_UC;
|
||||
Profile_Hit;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({UD,UD_T}, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_Atomic_UD;
|
||||
Profile_Hit;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, AtomicHit) {
|
||||
Pop_TriggerQueue;
|
||||
Callback_AtomicHit;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(I, {Load,Prefetch}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_LoadMiss;
|
||||
@@ -494,6 +596,55 @@ transition({BUSY_BLKD,BUSY_INTR}, UseTimeout) {
|
||||
Unset_Timeout_TBE;
|
||||
}
|
||||
|
||||
transition(I, AtomicLoad, BUSY_BLKD){
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_I;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(I, AtomicStore, BUSY_BLKD){
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_I;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(SD, AtomicLoad, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_SD;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(SC, AtomicLoad, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_SC;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(SD, AtomicStore, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_SD;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(SC, AtomicStore, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_SC;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
|
||||
// Evict from Upstream
|
||||
|
||||
transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,RSC,RSD,RUSD,RUSC,UD_RSD,SD_RSD}, Evict, BUSY_BLKD) {
|
||||
@@ -691,13 +842,15 @@ transition(BUSY_INTR, {SnpOnce,SnpOnceFwd}, BUSY_BLKD) {
|
||||
transition({BUSY_BLKD,BUSY_INTR},
|
||||
{ReadShared, ReadNotSharedDirty, ReadUnique, ReadUnique_PoC,
|
||||
ReadOnce, CleanUnique, CleanUnique_Stale,
|
||||
Load, Store, Prefetch,
|
||||
Load, Store, AtomicLoad, AtomicStore, Prefetch,
|
||||
WriteBackFull, WriteBackFull_Stale,
|
||||
WriteEvictFull, WriteEvictFull_Stale,
|
||||
WriteCleanFull, WriteCleanFull_Stale,
|
||||
Evict, Evict_Stale,
|
||||
WriteUnique,WriteUniquePtl_PoC,
|
||||
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc}) {
|
||||
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc
|
||||
AtomicReturn,AtomicReturn_PoC,
|
||||
AtomicNoReturn,AtomicNoReturn_PoC}) {
|
||||
StallRequest;
|
||||
}
|
||||
|
||||
@@ -754,6 +907,30 @@ transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} {
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendAtomicReturn, BUSY_INTR) {DestinationAvailable} {
|
||||
Pop_TriggerQueue;
|
||||
Send_AtomicReturn;
|
||||
CheckARComp;
|
||||
Profile_OutgoingStart;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendAtomicReturn_NoWait, BUSY_INTR) {
|
||||
Pop_TriggerQueue;
|
||||
Send_AtomicReturn_NoWait;
|
||||
CheckARComp;
|
||||
Profile_OutgoingStart;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendAtomicNoReturn, BUSY_INTR) {DestinationAvailable} {
|
||||
Pop_TriggerQueue;
|
||||
Send_AtomicNoReturn;
|
||||
Profile_OutgoingStart;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
|
||||
transition(BUSY_BLKD, SendWriteNoSnp, BUSY_INTR) {DestinationAvailable} {
|
||||
Pop_TriggerQueue;
|
||||
Send_WriteNoSnp;
|
||||
@@ -804,6 +981,20 @@ transition(BUSY_BLKD, SendWUDataCB) {
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition({BUSY_BLKD,BUSY_INTR}, SendARData) {
|
||||
Pop_TriggerQueue;
|
||||
Send_ARData;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition({BUSY_BLKD,BUSY_INTR}, SendANRData) {
|
||||
Pop_TriggerQueue;
|
||||
Callback_AtomicNoReturn;
|
||||
Send_ANRData;
|
||||
CheckANRComp;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendInvSnpResp) {
|
||||
Pop_TriggerQueue;
|
||||
Send_InvSnpResp;
|
||||
@@ -1025,6 +1216,26 @@ transition({BUSY_BLKD,BUSY_INTR}, SendComp_WU) {
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendCompDBIDResp_ANR) {
|
||||
Pop_TriggerQueue;
|
||||
ExpectNCBWrData_A;
|
||||
Send_CompDBIDResp;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendDBIDResp_AR) {
|
||||
Pop_TriggerQueue;
|
||||
ExpectNCBWrData_A;
|
||||
Send_DBIDResp;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition({BUSY_BLKD,BUSY_INTR}, SendCompData_AR) {
|
||||
Pop_TriggerQueue;
|
||||
Send_CompData_AR;
|
||||
ProcessNextState_ClearPending;
|
||||
}
|
||||
|
||||
transition(BUSY_BLKD, SendCompDBIDRespStale) {
|
||||
Pop_TriggerQueue;
|
||||
Send_CompDBIDResp_Stale;
|
||||
@@ -1085,6 +1296,7 @@ transition(BUSY_BLKD,
|
||||
transition({BUSY_BLKD,BUSY_INTR}, NCBWrData) {
|
||||
Receive_ReqDataResp;
|
||||
UpdateDataState_FromWUDataResp;
|
||||
UpdateDataState_FromADataResp;
|
||||
Pop_DataInQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
@@ -1238,10 +1450,11 @@ transition(BUSY_INTR, CompDBIDResp, BUSY_BLKD) {
|
||||
}
|
||||
|
||||
// alternative flow for WU with separate Comp
|
||||
transition(BUSY_INTR, DBIDResp, BUSY_BLKD) {
|
||||
transition({BUSY_INTR,BUSY_BLKD}, DBIDResp, BUSY_BLKD) {
|
||||
Receive_ReqResp;
|
||||
Receive_ReqResp_CopyDBID;
|
||||
Receive_ReqResp_WUNeedComp;
|
||||
Receive_ReqResp_AR;
|
||||
Pop_RespInQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// sending necessary snoops.
|
||||
Cycles read_hit_latency := 0;
|
||||
Cycles read_miss_latency := 0;
|
||||
Cycles atomic_op_latency := 0;
|
||||
Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req
|
||||
Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data
|
||||
Cycles fill_latency := 0; // Fill latency
|
||||
@@ -126,11 +127,24 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// possible.
|
||||
bool enable_DCT;
|
||||
|
||||
// Atomic Operation Policy
|
||||
// All Near executes all Atomics at L1 (variable set to 0; default)
|
||||
// Unique Near executes Atomics at HNF for states I, SC, SD (set to 1)
|
||||
// Present Near execites all Atomics at L1 except when state is I (set to 2)
|
||||
int policy_type := 1;
|
||||
|
||||
|
||||
// Use separate Comp/DBIDResp responses for WriteUnique
|
||||
bool comp_wu := "False";
|
||||
// additional latency for the WU Comp response
|
||||
Cycles comp_wu_latency := 0;
|
||||
|
||||
|
||||
// Use separate Comp/DBIDResp responses for AtomicNoResponse
|
||||
bool comp_anr := "False";
|
||||
// additional latency for the ANR Comp response
|
||||
Cycles comp_anr_latency := 0;
|
||||
|
||||
// Controls cache clusivity for different request types.
|
||||
// set all alloc_on* to false to completelly disable caching
|
||||
bool alloc_on_readshared;
|
||||
@@ -139,6 +153,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
bool alloc_on_writeback;
|
||||
bool alloc_on_seq_acc;
|
||||
bool alloc_on_seq_line_write;
|
||||
bool alloc_on_atomic;
|
||||
// Controls if the clusivity is strict.
|
||||
bool dealloc_on_unique;
|
||||
bool dealloc_on_shared;
|
||||
@@ -285,6 +300,8 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// See CHIRequestType in CHi-msg.sm for descriptions
|
||||
Load, desc="", in_trans="yes";
|
||||
Store, desc="", in_trans="yes";
|
||||
AtomicLoad, desc="", in_trans="yes";
|
||||
AtomicStore, desc="", in_trans="yes";
|
||||
Prefetch, desc="", in_trans="yes";
|
||||
ReadShared, desc="", in_trans="yes";
|
||||
ReadNotSharedDirty, desc="", in_trans="yes";
|
||||
@@ -300,6 +317,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
WriteUniquePtl_PoC, desc="", in_trans="yes";
|
||||
WriteUniqueFull_PoC, desc="", in_trans="yes";
|
||||
WriteUniqueFull_PoC_Alloc, desc="", in_trans="yes";
|
||||
AtomicReturn, desc="", in_trans="yes";
|
||||
AtomicNoReturn, desc="", in_trans="yes";
|
||||
AtomicReturn_PoC, desc="", in_trans="yes";
|
||||
AtomicNoReturn_PoC, desc="", in_trans="yes";
|
||||
SnpCleanInvalid, desc="", in_trans="yes";
|
||||
SnpShared, desc="", in_trans="yes";
|
||||
SnpSharedFwd, desc="", in_trans="yes";
|
||||
@@ -418,11 +439,12 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
DataArrayWriteOnFill, desc="Write the cache data array (cache fill)";
|
||||
|
||||
// Events for modeling the pipeline latency
|
||||
ReadHitPipe, desc="Latency of reads served from local cache";
|
||||
ReadMissPipe, desc="Latency of reads not served from local cache";
|
||||
WriteFEPipe, desc="Front-end latency of write requests";
|
||||
WriteBEPipe, desc="Back-end latency of write requests";
|
||||
FillPipe, desc="Cache fill latency";
|
||||
ReadHitPipe, desc="Latency of reads served from local cache";
|
||||
ReadMissPipe, desc="Latency of reads not served from local cache";
|
||||
WriteFEPipe, desc="Front-end latency of write requests";
|
||||
WriteBEPipe, desc="Back-end latency of write requests";
|
||||
FillPipe, desc="Cache fill latency";
|
||||
DelayAtomic, desc="Atomic operation latency";
|
||||
SnpSharedPipe, desc="Latency for SnpShared requests";
|
||||
SnpInvPipe, desc="Latency for SnpUnique and SnpCleanInv requests";
|
||||
SnpOncePipe, desc="Latency for SnpOnce requests";
|
||||
@@ -435,9 +457,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
SendReadUnique, out_trans="yes", desc="Send a ReadUnique";
|
||||
SendCompAck, desc="Send CompAck";
|
||||
// Read handling at the completer
|
||||
SendCompData, desc="Send CompData";
|
||||
WaitCompAck, desc="Expect to receive CompAck";
|
||||
SendRespSepData, desc="Send RespSepData for a DMT request";
|
||||
SendCompData, desc="Send CompData";
|
||||
WaitCompAck, desc="Expect to receive CompAck";
|
||||
SendRespSepData, desc="Send RespSepData for a DMT request";
|
||||
|
||||
// Send a write request downstream.
|
||||
SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
|
||||
@@ -449,11 +471,25 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
SendWUData, desc="Send write unique data";
|
||||
SendWUDataCB, desc="Send write unique data from a sequencer callback";
|
||||
// Write handling at the completer
|
||||
SendCompDBIDResp, desc="Ack WB with CompDBIDResp";
|
||||
SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
|
||||
SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data";
|
||||
SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data";
|
||||
SendComp_WU, desc="Ack WU completion";
|
||||
SendCompDBIDResp, desc="Ack WB with CompDBIDResp";
|
||||
SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
|
||||
SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data";
|
||||
SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data";
|
||||
SendComp_WU, desc="Ack WU completion";
|
||||
|
||||
// Send an atomic request downstream.
|
||||
SendAtomicReturn, out_trans="yes", desc="Send atomic request with return";
|
||||
SendAtomicReturn_NoWait, out_trans="yes", desc="Send atomic request with return, but no DBID";
|
||||
SendAtomicNoReturn, out_trans="yes", desc="Send atomic request without return";
|
||||
SendARData, desc="Send atomic return request data";
|
||||
SendANRData, desc="Send atomic no return request data";
|
||||
// Atomic handling at the completer
|
||||
SendDBIDResp_AR, desc="Ack AR with DBIDResp and set expected data";
|
||||
SendCompData_AR, desc="Ack AR completion";
|
||||
SendCompDBIDResp_ANR, desc="Ack ANR with CompDBIDResp and set expected data";
|
||||
SendDBIDResp_ANR, desc="Ack ANR with DBIDResp and set expected data";
|
||||
SendComp_ANR, desc="Ack ANR completion";
|
||||
|
||||
|
||||
// Dataless requests
|
||||
SendEvict, out_trans="yes", desc="Send a Evict";
|
||||
@@ -502,6 +538,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// Misc triggers
|
||||
LoadHit, desc="Complete a load hit";
|
||||
StoreHit, desc="Complete a store hit";
|
||||
AtomicHit, desc="Complete an atomic hit";
|
||||
UseTimeout, desc="Transition from UD_T -> UD";
|
||||
RestoreFromHazard, desc="Restore from a snoop hazard";
|
||||
TX_Data, desc="Transmit pending data messages";
|
||||
@@ -613,6 +650,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
bool is_local_pf, desc="Request generated by a local prefetcher";
|
||||
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
|
||||
|
||||
// Atomic info associated with the transaction
|
||||
WriteMask atomic_op, desc="Atomic Operation Wrapper";
|
||||
bool atomic_to_be_done, desc="We have yet to perform the atomic";
|
||||
|
||||
// NOTE: seqReq is a smart pointer pointing to original CPU request object
|
||||
// that triggers transactions associated with this TBE. seqReq carries some
|
||||
// information (e.g., PC of requesting instruction, virtual address of this
|
||||
@@ -630,8 +671,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// stable state.
|
||||
bool hasUseTimeout, desc="Line is locked under store/use timeout";
|
||||
DataBlock dataBlk, desc="Local copy of the line";
|
||||
DataBlock oldDataBlk, desc="Local copy of the line before executing atomic";
|
||||
WriteMask dataBlkValid, desc="Marks which bytes in the DataBlock are valid";
|
||||
bool dataValid, desc="Local copy is valid";
|
||||
bool dataAMOValid, desc="Local copy is valid for AMO";
|
||||
bool dataDirty, desc="Local copy is dirtry";
|
||||
bool dataMaybeDirtyUpstream, desc="Line maybe dirty upstream";
|
||||
bool dataUnique, desc="Line is unique either locally or upsatream";
|
||||
|
||||
@@ -46,6 +46,8 @@ enumeration(CHIRequestType, desc="") {
|
||||
Load;
|
||||
Store;
|
||||
StoreLine;
|
||||
AtomicLoad;
|
||||
AtomicStore;
|
||||
// Incoming DVM-related requests generated by the sequencer
|
||||
DvmTlbi_Initiate;
|
||||
DvmSync_Initiate;
|
||||
@@ -66,6 +68,9 @@ enumeration(CHIRequestType, desc="") {
|
||||
WriteUniquePtl;
|
||||
WriteUniqueFull;
|
||||
|
||||
AtomicReturn;
|
||||
AtomicNoReturn;
|
||||
|
||||
SnpSharedFwd;
|
||||
SnpNotSharedDirtyFwd;
|
||||
SnpUniqueFwd;
|
||||
@@ -108,6 +113,8 @@ structure(CHIRequestMsg, desc="", interface="Message") {
|
||||
bool is_local_pf, desc="Request generated by a local prefetcher";
|
||||
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
|
||||
|
||||
WriteMask atomic_op, desc="Atomic Operation Wrapper";
|
||||
|
||||
bool usesTxnId, desc="True if using a Transaction ID", default="false";
|
||||
Addr txnId, desc="Transaction ID", default="0";
|
||||
|
||||
|
||||
@@ -123,5 +123,14 @@ RubyRequest::functionalWrite(Packet *pkt)
|
||||
return cBase < cTail;
|
||||
}
|
||||
|
||||
void
|
||||
RubyRequest::setWriteMask(uint32_t offset, uint32_t len,
|
||||
std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps)
|
||||
{
|
||||
m_writeMask.setMask(offset, len);
|
||||
m_writeMask.setAtomicOps(atomicOps);
|
||||
}
|
||||
|
||||
|
||||
} // namespace ruby
|
||||
} // namespace gem5
|
||||
|
||||
@@ -226,6 +226,8 @@ class RubyRequest : public Message
|
||||
const PrefetchBit& getPrefetch() const { return m_Prefetch; }
|
||||
RequestPtr getRequestPtr() const { return m_pkt->req; }
|
||||
|
||||
void setWriteMask(uint32_t offset, uint32_t len,
|
||||
std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps);
|
||||
void print(std::ostream& out) const;
|
||||
bool functionalRead(Packet *pkt);
|
||||
bool functionalRead(Packet *pkt, WriteMask &mask);
|
||||
|
||||
@@ -466,8 +466,12 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
|
||||
bool ruby_request = true;
|
||||
while (!seq_req_list.empty()) {
|
||||
SequencerRequest &seq_req = seq_req_list.front();
|
||||
// Atomic Request may be executed remotly in the cache hierarchy
|
||||
bool atomic_req =
|
||||
((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
|
||||
(seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
|
||||
|
||||
if (noCoales && !ruby_request) {
|
||||
if ((noCoales || atomic_req) && !ruby_request) {
|
||||
// Do not process follow-up requests
|
||||
// (e.g. if full line no present)
|
||||
// Reissue to the cache hierarchy
|
||||
@@ -479,6 +483,8 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
|
||||
assert(seq_req.m_type != RubyRequestType_LD);
|
||||
assert(seq_req.m_type != RubyRequestType_Load_Linked);
|
||||
assert(seq_req.m_type != RubyRequestType_IFETCH);
|
||||
assert(seq_req.m_type != RubyRequestType_ATOMIC_RETURN);
|
||||
assert(seq_req.m_type != RubyRequestType_ATOMIC_NO_RETURN);
|
||||
}
|
||||
|
||||
// handle write request
|
||||
@@ -594,6 +600,62 @@ Sequencer::readCallback(Addr address, DataBlock& data,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequencer::atomicCallback(Addr address, DataBlock& data,
|
||||
const bool externalHit, const MachineType mach,
|
||||
const Cycles initialRequestTime,
|
||||
const Cycles forwardRequestTime,
|
||||
const Cycles firstResponseTime)
|
||||
{
|
||||
//
|
||||
// Free the first request (an atomic operation) from the list.
|
||||
// Then issue the next request to ruby system as we cannot
|
||||
// assume the cache line is present in the cache
|
||||
// (the opperation could be performed remotly)
|
||||
//
|
||||
assert(address == makeLineAddress(address));
|
||||
assert(m_RequestTable.find(address) != m_RequestTable.end());
|
||||
auto &seq_req_list = m_RequestTable[address];
|
||||
|
||||
// Perform hitCallback only on the first cpu request that
|
||||
// issued the ruby request
|
||||
bool ruby_request = true;
|
||||
while (!seq_req_list.empty()) {
|
||||
SequencerRequest &seq_req = seq_req_list.front();
|
||||
|
||||
if (ruby_request) {
|
||||
// Check that the request was an atomic memory operation
|
||||
// and record the latency
|
||||
assert((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
|
||||
(seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
|
||||
recordMissLatency(&seq_req, true, mach, externalHit,
|
||||
initialRequestTime, forwardRequestTime,
|
||||
firstResponseTime);
|
||||
} else {
|
||||
// Read, Write or Atomic request:
|
||||
// reissue request to the cache hierarchy
|
||||
// (we don't know if op was performed remotly)
|
||||
issueRequest(seq_req.pkt, seq_req.m_second_type);
|
||||
break;
|
||||
}
|
||||
|
||||
// Atomics clean the monitor entry
|
||||
llscClearMonitor(address);
|
||||
|
||||
markRemoved();
|
||||
ruby_request = false;
|
||||
hitCallback(&seq_req, data, true, mach, externalHit,
|
||||
initialRequestTime, forwardRequestTime,
|
||||
firstResponseTime, false);
|
||||
seq_req_list.pop_front();
|
||||
}
|
||||
|
||||
// free all outstanding requests corresponding to this address
|
||||
if (seq_req_list.empty()) {
|
||||
m_RequestTable.erase(address);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
|
||||
bool llscSuccess,
|
||||
@@ -637,10 +699,16 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
|
||||
(type == RubyRequestType_IFETCH) ||
|
||||
(type == RubyRequestType_RMW_Read) ||
|
||||
(type == RubyRequestType_Locked_RMW_Read) ||
|
||||
(type == RubyRequestType_Load_Linked)) {
|
||||
(type == RubyRequestType_Load_Linked) ||
|
||||
(type == RubyRequestType_ATOMIC_RETURN)) {
|
||||
pkt->setData(
|
||||
data.getData(getOffset(request_address), pkt->getSize()));
|
||||
DPRINTF(RubySequencer, "read data %s\n", data);
|
||||
|
||||
if (type == RubyRequestType_ATOMIC_RETURN) {
|
||||
DPRINTF(RubySequencer, "ATOMIC RETURN data %s\n", data);
|
||||
} else {
|
||||
DPRINTF(RubySequencer, "read data %s\n", data);
|
||||
}
|
||||
} else if (pkt->req->isSwap()) {
|
||||
assert(!pkt->isMaskedWrite());
|
||||
std::vector<uint8_t> overwrite_val(pkt->getSize());
|
||||
@@ -807,6 +875,19 @@ Sequencer::makeRequest(PacketPtr pkt)
|
||||
} else if (pkt->req->isTlbiCmd()) {
|
||||
primary_type = secondary_type = tlbiCmdToRubyRequestType(pkt);
|
||||
DPRINTF(RubySequencer, "Issuing TLBI\n");
|
||||
#if defined (PROTOCOL_CHI)
|
||||
} else if (pkt->isAtomicOp()) {
|
||||
if (pkt->req->isAtomicReturn()){
|
||||
DPRINTF(RubySequencer, "Issuing ATOMIC RETURN \n");
|
||||
primary_type = secondary_type =
|
||||
RubyRequestType_ATOMIC_RETURN;
|
||||
} else {
|
||||
DPRINTF(RubySequencer, "Issuing ATOMIC NO RETURN\n");
|
||||
primary_type = secondary_type =
|
||||
RubyRequestType_ATOMIC_NO_RETURN;
|
||||
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
//
|
||||
// To support SwapReq, we need to check isWrite() first: a SwapReq
|
||||
@@ -914,6 +995,18 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
|
||||
RubyAccessMode_Supervisor, pkt,
|
||||
PrefetchBit_No, proc_id, core_id);
|
||||
|
||||
if (pkt->isAtomicOp() &&
|
||||
((secondary_type == RubyRequestType_ATOMIC_RETURN) ||
|
||||
(secondary_type == RubyRequestType_ATOMIC_NO_RETURN))){
|
||||
// Create the blocksize, access mask and atomicops
|
||||
uint32_t offset = getOffset(pkt->getAddr());
|
||||
std::vector<std::pair<int,AtomicOpFunctor*>> atomicOps;
|
||||
atomicOps.push_back(std::make_pair<int,AtomicOpFunctor*>
|
||||
(offset, pkt->getAtomicOp()));
|
||||
|
||||
msg->setWriteMask(offset, pkt->getSize(), atomicOps);
|
||||
}
|
||||
|
||||
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
|
||||
curTick(), m_version, "Seq", "Begin", "", "",
|
||||
printAddress(msg->getPhysicalAddress()),
|
||||
|
||||
@@ -126,6 +126,14 @@ class Sequencer : public RubyPort
|
||||
const Cycles forwardRequestTime = Cycles(0),
|
||||
const Cycles firstResponseTime = Cycles(0));
|
||||
|
||||
void atomicCallback(Addr address,
|
||||
DataBlock& data,
|
||||
const bool externalHit = false,
|
||||
const MachineType mach = MachineType_NUM,
|
||||
const Cycles initialRequestTime = Cycles(0),
|
||||
const Cycles forwardRequestTime = Cycles(0),
|
||||
const Cycles firstResponseTime = Cycles(0));
|
||||
|
||||
void unaddressedCallback(Addr unaddressedReqId,
|
||||
RubyRequestType requestType,
|
||||
const MachineType mach = MachineType_NUM,
|
||||
|
||||
Reference in New Issue
Block a user