mem-ruby: Add new feature far atomics in CHI (#177)

Added a new feature to CHI protocol (in collaboration with @tiagormk).
Here is the Jira Ticket
[https://gem5.atlassian.net/browse/GEM5-1326](https://gem5.atlassian.net/browse/GEM5-1326
). As described in CHI specs, far atomic transactions enable remote
execution of Atomic Memory Operations. This pull request incorporates
several changes:

* Fix Arm ISA definition of Swap instructions. These instructions should
return an operand, so their ISA definition should be Return Operation.
* Enable AMOs in Ruby Mem Test to verify that AMOs work
* Enable near and far AMO in the Cache Controler of CHI

Three configuration parameters have been used to tune this behavior:
* policy_type: sets the atomic policy to one of the described in [our
paper](https://dl.acm.org/doi/10.1145/3579371.3589065)
* atomic_op_latency: simulates the AMO ALU operation latency
* comp_anr: configures the Atomic No return transaction to split
CompDBIDResp into two different messages DBIDResp and Comp
This commit is contained in:
Giacomo Travaglini
2023-10-06 10:09:58 +01:00
committed by GitHub
17 changed files with 1021 additions and 60 deletions

View File

@@ -62,6 +62,12 @@ parser.add_argument(
default=0,
help="percentage of accesses that should be functional",
)
parser.add_argument(
"--atomic",
type=int,
default=30,
help="percentage of accesses that should be atomic",
)
parser.add_argument(
"--suppress-func-errors",
action="store_true",
@@ -105,6 +111,7 @@ cpus = [
max_loads=args.maxloads,
percent_functional=args.functional,
percent_uncacheable=0,
percent_atomic=args.atomic,
progress_interval=args.progress,
suppress_func_errors=args.suppress_func_errors,
)
@@ -133,7 +140,7 @@ else:
dmas = []
dma_ports = []
for (i, dma) in enumerate(dmas):
for i, dma in enumerate(dmas):
dma_ports.append(dma.test)
Ruby.create_system(args, False, system, dma_ports=dma_ports)
@@ -155,7 +162,7 @@ system.ruby.randomization = True
assert len(cpus) == len(system.ruby._cpu_ports)
for (i, cpu) in enumerate(cpus):
for i, cpu in enumerate(cpus):
#
# Tie the cpu memtester ports to the correct system ports
#

View File

@@ -244,6 +244,7 @@ class CHI_L1Controller(CHI_Cache_Controller):
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.alloc_on_atomic = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
@@ -280,6 +281,7 @@ class CHI_L2Controller(CHI_Cache_Controller):
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.alloc_on_atomic = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
@@ -316,6 +318,7 @@ class CHI_HNFController(CHI_Cache_Controller):
self.alloc_on_readunique = False
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.alloc_on_atomic = True
self.dealloc_on_unique = True
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
@@ -392,6 +395,7 @@ class CHI_DMAController(CHI_Cache_Controller):
self.alloc_on_readunique = False
self.alloc_on_readonce = False
self.alloc_on_writeback = False
self.alloc_on_atomic = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False

View File

@@ -827,35 +827,35 @@ let {{
ret_op=False, flavor="release").emit(OP_DICT['MIN'])
AtomicArithmeticSingleOp("swpb", "SWPB", 1, unsign=True,
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
flavor="normal").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swplb", "SWPLB", 1, unsign=True,
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
flavor="release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpab", "SWPAB", 1, unsign=True,
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
flavor="acquire").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swplab", "SWPLAB", 1, unsign=True,
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
flavor="acquire_release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swph", "SWPH", 2, unsign=True,
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
flavor="normal").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swplh", "SWPLH", 2, unsign=True,
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
flavor="release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpah", "SWPAH", 2, unsign=True,
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
flavor="acquire").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swplah", "SWPLAH", 2, unsign=True,
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
flavor="acquire_release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swp", "SWP", 4, unsign=True,
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
flavor="normal").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpl", "SWPL", 4, unsign=True,
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
flavor="release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpa", "SWPA", 4, unsign=True,
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
flavor="acquire").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpla", "SWPLA", 4, unsign=True,
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
flavor="acquire_release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swp64", "SWP64", 8, unsign=True,
ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
flavor="normal").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpl64", "SWPL64", 8, unsign=True,
ret_op=False, flavor="release").emit(OP_DICT['SWP'])
flavor="release").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpa64", "SWPA64", 8, unsign=True,
ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
flavor="acquire").emit(OP_DICT['SWP'])
AtomicArithmeticSingleOp("swpla64", "SWPLA64", 8, unsign=True,
ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
flavor="acquire_release").emit(OP_DICT['SWP'])
}};

View File

@@ -63,6 +63,7 @@ class MemTest(ClockedObject):
percent_reads = Param.Percent(65, "Percentage reads")
percent_functional = Param.Percent(50, "Percentage functional accesses")
percent_uncacheable = Param.Percent(10, "Percentage uncacheable")
percent_atomic = Param.Percent(50, "Percentage atomics")
# Determine how often to print progress messages and what timeout
# to use for checking progress of both requests and responses

View File

@@ -94,6 +94,7 @@ MemTest::MemTest(const Params &p)
percentReads(p.percent_reads),
percentFunctional(p.percent_functional),
percentUncacheable(p.percent_uncacheable),
percentAtomic(p.percent_atomic),
requestorId(p.system->getRequestorId(this)),
blockSize(p.system->cacheLineSize()),
blockAddrMask(blockSize - 1),
@@ -115,6 +116,7 @@ MemTest::MemTest(const Params &p)
// set up counters
numReads = 0;
numWrites = 0;
numAtomics = 0;
// kick things into action
schedule(tickEvent, curTick());
@@ -142,7 +144,7 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
outstandingAddrs.erase(remove_addr);
DPRINTF(MemTest, "Completing %s at address %x (blk %x) %s\n",
pkt->isWrite() ? "write" : "read",
pkt->isWrite() ? pkt->isAtomicOp() ? "atomic" : "write" : "read",
req->getPaddr(), blockAlign(req->getPaddr()),
pkt->isError() ? "error" : "success");
@@ -153,7 +155,25 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
panic( "%s access failed at %#x\n",
pkt->isWrite() ? "Write" : "Read", req->getPaddr());
} else {
if (pkt->isRead()) {
if (pkt->isAtomicOp()) {
uint8_t ref_data = referenceData[req->getPaddr()];
if (pkt_data[0] != ref_data) {
panic("%s: read of %x (blk %x) @ cycle %d "
"returns %x, expected %x\n", name(),
req->getPaddr(), blockAlign(req->getPaddr()), curTick(),
pkt_data[0], ref_data);
}
DPRINTF(MemTest,
"Completing atomic at address %x (blk %x) value %x\n",
req->getPaddr(), blockAlign(req->getPaddr()),
pkt_data[0]);
referenceData[req->getPaddr()] =
atomicPendingData[req->getPaddr()];
numAtomics++;
stats.numAtomics++;
} else if (pkt->isRead()) {
uint8_t ref_data = referenceData[req->getPaddr()];
if (pkt_data[0] != ref_data) {
panic("%s: read of %x (blk %x) @ cycle %d "
@@ -167,9 +187,10 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
if (numReads == (uint64_t)nextProgressMessage) {
ccprintf(std::cerr,
"%s: completed %d read, %d write accesses @%d\n",
name(), numReads, numWrites, curTick());
nextProgressMessage += progressInterval;
"%s: completed %d read, %d write, "
"%d atomic accesses @%d\n",
name(), numReads, numWrites, numAtomics, curTick());
nextProgressMessage += progressInterval;
}
if (maxLoads != 0 && numReads >= maxLoads)
@@ -205,7 +226,9 @@ MemTest::MemTestStats::MemTestStats(statistics::Group *parent)
ADD_STAT(numReads, statistics::units::Count::get(),
"number of read accesses completed"),
ADD_STAT(numWrites, statistics::units::Count::get(),
"number of write accesses completed")
"number of write accesses completed"),
ADD_STAT(numAtomics, statistics::units::Count::get(),
"number of atomic accesses completed")
{
}
@@ -221,6 +244,8 @@ MemTest::tick()
unsigned cmd = random_mt.random(0, 100);
uint8_t data = random_mt.random<uint8_t>();
bool uncacheable = random_mt.random(0, 100) < percentUncacheable;
bool do_atomic = (random_mt.random(0, 100) < percentAtomic) &&
!uncacheable;
unsigned base = random_mt.random(0, 1);
Request::Flags flags;
Addr paddr;
@@ -281,13 +306,36 @@ MemTest::tick()
pkt = new Packet(req, MemCmd::ReadReq);
pkt->dataDynamic(pkt_data);
} else {
DPRINTF(MemTest, "Initiating %swrite at addr %x (blk %x) value %x\n",
do_functional ? "functional " : "", req->getPaddr(),
blockAlign(req->getPaddr()), data);
if (do_atomic) {
DPRINTF(MemTest,
"Initiating atomic at addr %x (blk %x) value %x\n",
req->getPaddr(), blockAlign(req->getPaddr()), data);
pkt = new Packet(req, MemCmd::WriteReq);
pkt->dataDynamic(pkt_data);
pkt_data[0] = data;
TypedAtomicOpFunctor<uint8_t> *_amo_op =
new AtomicGeneric3Op<uint8_t>(
data, data,
[](uint8_t* b, uint8_t a, uint8_t c){
*b = c;
});
assert(_amo_op);
AtomicOpFunctorPtr amo_op = AtomicOpFunctorPtr(_amo_op);
req->setAtomicOpFunctor(std::move(amo_op));
req->setFlags(Request::ATOMIC_RETURN_OP);
pkt = new Packet(req, MemCmd::WriteReq);
pkt->dataDynamic(pkt_data);
pkt_data[0] = data;
atomicPendingData[req->getPaddr()] = data;
} else {
DPRINTF(MemTest,
"Initiating %swrite at addr %x (blk %x) value %x\n",
do_functional ? "functional " : "", req->getPaddr(),
blockAlign(req->getPaddr()), data);
pkt = new Packet(req, MemCmd::WriteReq);
pkt->dataDynamic(pkt_data);
pkt_data[0] = data;
}
}
// there is no point in ticking if we are waiting for a retry

View File

@@ -131,6 +131,7 @@ class MemTest : public ClockedObject
const unsigned percentReads;
const unsigned percentFunctional;
const unsigned percentUncacheable;
const unsigned percentAtomic;
/** Request id for all generated traffic */
RequestorID requestorId;
@@ -138,6 +139,7 @@ class MemTest : public ClockedObject
unsigned int id;
std::unordered_set<Addr> outstandingAddrs;
std::unordered_map<Addr, uint8_t> atomicPendingData;
// store the expected value for the addresses we have touched
std::unordered_map<Addr, uint8_t> referenceData;
@@ -169,6 +171,7 @@ class MemTest : public ClockedObject
uint64_t numReads;
uint64_t numWrites;
uint64_t numAtomics;
const uint64_t maxLoads;
const bool atomic;
@@ -180,6 +183,7 @@ class MemTest : public ClockedObject
MemTestStats(statistics::Group *parent);
statistics::Scalar numReads;
statistics::Scalar numWrites;
statistics::Scalar numAtomics;
} stats;
/**

View File

@@ -757,6 +757,13 @@ class Request : public Extensible<Request>
return atomicOpFunctor.get();
}
void
setAtomicOpFunctor(AtomicOpFunctorPtr amo_op)
{
atomicOpFunctor = std::move(amo_op);
}
/**
* Accessor for hardware transactional memory abort cause.
*/

View File

@@ -139,6 +139,13 @@ structure (Sequencer, external = "yes") {
Cycles, Cycles, Cycles);
void writeUniqueCallback(Addr, DataBlock);
void atomicCallback(Addr, DataBlock);
void atomicCallback(Addr, DataBlock, bool);
void atomicCallback(Addr, DataBlock, bool, MachineType);
void atomicCallback(Addr, DataBlock, bool, MachineType,
Cycles, Cycles, Cycles);
void unaddressedCallback(Addr, RubyRequestType);
void unaddressedCallback(Addr, RubyRequestType, MachineType);
void unaddressedCallback(Addr, RubyRequestType, MachineType,

View File

@@ -148,15 +148,22 @@ action(AllocateTBE_SeqRequest, desc="") {
out_msg.is_remote_pf := false;
out_msg.txnId := max_outstanding_transactions;
out_msg.atomic_op.clear();
out_msg.atomic_op.orMask(in_msg.writeMask);
if ((in_msg.Type == RubyRequestType:LD) ||
(in_msg.Type == RubyRequestType:IFETCH)) {
out_msg.type := CHIRequestType:Load;
} else if (in_msg.Type == RubyRequestType:ST) {
} else if (in_msg.Type == RubyRequestType:ST) {
if (in_msg.Size == blockSize) {
out_msg.type := CHIRequestType:StoreLine;
} else {
out_msg.type := CHIRequestType:Store;
}
} else if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) {
out_msg.type := CHIRequestType:AtomicLoad;
} else if (in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN){
out_msg.type := CHIRequestType:AtomicStore;
} else {
error("Invalid RubyRequestType");
}
@@ -769,6 +776,148 @@ action(Initiate_StoreMiss, desc="") {
}
}
action(Initiate_Atomic_UC, desc="") {
if ((policy_type == 0) || // ALL NEAR
(policy_type == 1) || // UNIQUE NEAR
(policy_type == 2) // PRESENT NEAR
){
tbe.actions.push(Event:DataArrayRead);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:AtomicHit);
tbe.actions.pushNB(Event:DataArrayWrite);
tbe.actions.pushNB(Event:TagArrayWrite);
} else {
error("Invalid policy type");
}
}
action(Initiate_Atomic_UD, desc="") {
if ((policy_type == 0) || // ALL NEAR
(policy_type == 1) || // UNIQUE NEAR
(policy_type == 2) // PRESENT NEAR
){
tbe.actions.push(Event:DataArrayRead);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:AtomicHit);
tbe.actions.pushNB(Event:DataArrayWrite);
tbe.actions.pushNB(Event:TagArrayWrite);
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicReturn_I, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if ((policy_type == 1) || // UNIQUE NEAR
(policy_type == 2)) { // PRESENT NEAR
tbe.actions.push(Event:SendAtomicReturn_NoWait);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicNoReturn_I, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if (policy_type == 1) { // UNIQUE NEAR
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.push(Event:SendANRData);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicReturn_SD, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if (policy_type == 1) { // UNIQUE NEAR
tbe.actions.push(Event:SendAtomicReturn_NoWait);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicNoReturn_SD, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if (policy_type == 1) { // UNIQUE NEAR
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.push(Event:SendANRData);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicReturn_SC, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if (policy_type == 1) { // UNIQUE NEAR
tbe.actions.push(Event:SendAtomicReturn_NoWait);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_AtomicNoReturn_SC, desc="") {
if (policy_type == 0){ // ALL NEAR
tbe.actions.push(Event:SendReadUnique);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.atomic_to_be_done := true;
} else if (policy_type == 1) { // UNIQUE NEAR
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.push(Event:SendANRData);
tbe.dataToBeInvalid := true;
tbe.doCacheFill := false;
tbe.atomic_to_be_done := false;
} else {
error("Invalid policy type");
}
}
action(Initiate_StoreUpgrade, desc="") {
assert(tbe.dataValid);
assert(is_valid(cache_entry));
@@ -865,8 +1014,111 @@ action(Initiate_WriteUnique_Forward, desc="") {
tbe.actions.pushNB(Event:TagArrayWrite);
}
action(Initiate_AtomicReturn_LocalWrite, desc="") {
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
tbe.actions.push(Event:SendSnpUnique);
} else if (tbe.dir_sharers.count() > 0){
// no one will send us data unless we explicitly ask
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
}
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendCompData_AR);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
tbe.actions.push(Event:SendSnpUnique);
} else if (tbe.dir_sharers.count() > 0){
// no one will send us data unless we explicitly ask
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
}
if (comp_wu) {
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicReturn_Forward, desc="") {
if ((tbe.dir_sharers.count() > 0) &&
(tbe.dir_sharers.isElement(tbe.requestor))){
tbe.dir_sharers.remove(tbe.requestor);
}
tbe.actions.push(Event:SendAtomicReturn);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.pushNB(Event:TagArrayWrite);
tbe.dataToBeInvalid := true;
}
action(Initiate_AtomicNoReturn_Forward, desc="") {
if ((tbe.dir_sharers.count() > 0) &&
(tbe.dir_sharers.isElement(tbe.requestor))){
tbe.dir_sharers.remove(tbe.requestor);
}
if (comp_wu) {
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.push(Event:SendCompDBIDResp_ANR);
}
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:SendANRData);
tbe.actions.pushNB(Event:TagArrayWrite);
tbe.dataToBeInvalid := true;
}
action(Initiate_AtomicReturn_Miss, desc="") {
tbe.actions.push(Event:SendReadNoSnp);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendCompData_AR);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_Miss, desc="") {
assert(is_HN);
tbe.actions.push(Event:SendReadNoSnp);
if (comp_wu) {
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_CopyBack, desc="") {
// expect to receive this data after Send_CompDBIDResp
if (tbe.reqType == CHIRequestType:WriteBackFull) {
@@ -1157,7 +1409,9 @@ action(Send_ReadShared, desc="") {
action(Send_ReadNoSnp, desc="") {
assert(is_HN);
assert(tbe.use_DMT == false);
assert((tbe.use_DMT == false) ||
((tbe.reqType == CHIRequestType:AtomicReturn) ||
(tbe.reqType == CHIRequestType:AtomicNoReturn)));
clearExpectedReqResp(tbe);
tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC);
@@ -1368,6 +1622,45 @@ action(Send_WriteUnique, desc="") {
tbe.expected_req_resp.addExpectedCount(1);
}
action(Send_AtomicReturn, desc="") {
assert(is_valid(tbe));
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
allowRequestRetry(tbe, out_msg);
}
clearExpectedReqResp(tbe);
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
tbe.expected_req_resp.addExpectedCount(1);
}
action(Send_AtomicReturn_NoWait, desc="") {
assert(is_valid(tbe));
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
allowRequestRetry(tbe, out_msg);
}
tbe.dataAMOValid := false;
}
action(Send_AtomicNoReturn, desc="") {
assert(is_valid(tbe));
enqueue(reqOutPort, CHIRequestMsg, request_latency) {
prepareRequestAtomic(tbe, CHIRequestType:AtomicNoReturn, out_msg);
out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
allowRequestRetry(tbe, out_msg);
}
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp);
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
tbe.expected_req_resp.addExpectedCount(1);
}
action(Send_SnpCleanInvalid, desc="") {
assert(is_valid(tbe));
assert(tbe.expected_snp_resp.hasExpected() == false);
@@ -1636,6 +1929,20 @@ action(ExpectNCBWrData, desc="") {
tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize, false);
}
action(ExpectNCBWrData_A, desc="") {
// Expected data
int num_msgs := tbe.accSize / data_channel_size;
if ((tbe.accSize % data_channel_size) != 0) {
num_msgs := num_msgs + 1;
}
tbe.expected_req_resp.clear(num_msgs);
tbe.expected_req_resp.addExpectedDataType(CHIDataType:NCBWrData);
tbe.expected_req_resp.setExpectedCount(1);
// In atomic operations we do not expect real data for the current block
// Thus the mask bits do not care
}
action(ExpectCompAck, desc="") {
assert(is_valid(tbe));
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompAck);
@@ -1658,7 +1965,22 @@ action(Receive_ReqDataResp, desc="") {
}
// Copy data to tbe only if we didn't have valid data or the received
// data is dirty
if ((tbe.dataBlkValid.isFull() == false) ||
if ((in_msg.type == CHIDataType:NCBWrData) &&
((tbe.reqType == CHIRequestType:AtomicReturn) ||
(tbe.reqType == CHIRequestType:AtomicNoReturn))){
// DO NOTHING
} else if ((in_msg.type == CHIDataType:CompData_I) &&
((tbe.reqType == CHIRequestType:AtomicReturn) ||
(tbe.reqType == CHIRequestType:AtomicLoad))) {
if(tbe.dataBlkValid.isFull()){
tbe.dataBlkValid.clear();
}
tbe.oldDataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask);
assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false);
tbe.dataBlkValid.orMask(in_msg.bitMask);
DPRINTF(RubySlicc, "Received %s\n", tbe.oldDataBlk);
DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid);
} else if ((tbe.dataBlkValid.isFull() == false) ||
(in_msg.type == CHIDataType:CompData_UD_PD) ||
(in_msg.type == CHIDataType:CompData_SD_PD) ||
(in_msg.type == CHIDataType:CBWrData_UD_PD) ||
@@ -1683,7 +2005,8 @@ action(Receive_RespSepDataFromCompData, desc="") {
if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) {
error("Received unexpected message");
}
if (is_HN == false) {
if ((is_HN == false) && (tbe.reqType != CHIRequestType:AtomicReturn) &&
((tbe.reqType != CHIRequestType:AtomicLoad) || (tbe.atomic_to_be_done == true))){
// must now ack the responder
tbe.actions.pushFrontNB(Event:SendCompAck);
}
@@ -1905,6 +2228,7 @@ action(UpdateDataState_FromReqDataResp, desc="") {
} else if (in_msg.type == CHIDataType:CompData_I) {
tbe.dataValid := true;
tbe.dataAMOValid := true;
tbe.dataToBeInvalid := true;
assert(tbe.dataMaybeDirtyUpstream == false);
@@ -1946,7 +2270,9 @@ action(UpdateDataState_FromReqDataResp, desc="") {
action(UpdateDataState_FromWUDataResp, desc="") {
assert(is_valid(tbe));
if (tbe.expected_req_resp.hasReceivedData()) {
if (tbe.expected_req_resp.hasReceivedData() &&
(tbe.reqType != CHIRequestType:AtomicReturn) &&
(tbe.reqType != CHIRequestType:AtomicNoReturn)) {
assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)));
assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)
+ tbe.accSize - 1));
@@ -1964,6 +2290,22 @@ action(UpdateDataState_FromWUDataResp, desc="") {
printTBEState(tbe);
}
action(UpdateDataState_FromADataResp, desc="") {
assert(is_valid(tbe));
if (is_HN && (tbe.expected_req_resp.hasReceivedData()) &&
((tbe.reqType == CHIRequestType:AtomicReturn) ||
(tbe.reqType == CHIRequestType:AtomicNoReturn))) {
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
tbe.oldDataBlk := tbe.dataBlk;
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
tbe.dataDirty := true;
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
}
printTBEState(tbe);
}
action(UpdateDataState_FromCUResp, desc="") {
assert(is_valid(tbe));
peek(rspInPort, CHIResponseMsg) {
@@ -2127,6 +2469,10 @@ action(Receive_ReqResp_WUNeedComp, desc="") {
tbe.defer_expected_comp := true;
}
action(Receive_ReqResp_AR, desc="") {
tbe.actions.pushFrontNB(Event:SendARData);
}
action(Receive_ReqResp_WUComp, desc="") {
if (tbe.defer_expected_comp) {
tbe.defer_expected_comp := false;
@@ -2320,6 +2666,36 @@ action(CheckWUComp, desc="") {
}
}
action(Send_ARData, desc="") {
assert(is_valid(tbe));
tbe.snd_msgType := CHIDataType:NCBWrData;
tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
setupPendingAtomicSend(tbe);
}
action(Send_ANRData, desc="") {
assert(is_valid(tbe));
tbe.snd_msgType := CHIDataType:NCBWrData;
tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
setupPendingAtomicSend(tbe);
}
action(CheckARComp, desc="") {
assert(is_valid(tbe));
tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I);
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData);
tbe.expected_req_resp.addExpectedCount(2);
}
action(CheckANRComp, desc="") {
assert(is_valid(tbe));
if (tbe.defer_expected_comp) {
tbe.defer_expected_comp := false;
tbe.expected_req_resp.addExpectedCount(1);
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp);
}
}
action(Send_SnpRespData, desc="") {
assert(is_HN == false);
assert(is_valid(tbe));
@@ -2531,7 +2907,12 @@ action(Send_Data, desc="") {
}
tbe.snd_pendBytes.setMask(offset, range, false);
out_msg.dataBlk := tbe.dataBlk;
if (tbe.reqType == CHIRequestType:AtomicReturn){
out_msg.dataBlk := tbe.oldDataBlk;
} else {
out_msg.dataBlk := tbe.dataBlk;
}
out_msg.bitMask.setMask(offset, range);
out_msg.responder := machineID;
@@ -2673,6 +3054,36 @@ action(Send_Comp_WU, desc="") {
}
}
action(Send_CompData_AR, desc="") {
assert(is_valid(tbe));
assert(tbe.dataValid);
if (is_HN) {
tbe.oldDataBlk := tbe.dataBlk;
}
tbe.snd_msgType := CHIDataType:CompData_I;
tbe.dataMaybeDirtyUpstream := false;
tbe.requestorToBeExclusiveOwner := false;
tbe.requestorToBeOwner := false;
tbe.snd_destination := tbe.requestor;
setupPendingSend(tbe);
printTBEState(tbe);
}
action(Send_Comp_ANR, desc="") {
assert(is_valid(tbe));
enqueue(rspOutPort, CHIResponseMsg, comp_anr_latency + response_latency) {
out_msg.addr := address;
out_msg.type := CHIResponseType:Comp;
out_msg.responder := machineID;
out_msg.Destination.add(tbe.requestor);
}
}
action(Send_SnpRespI, desc="") {
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
out_msg.addr := address;
@@ -3003,6 +3414,22 @@ action(Callback_StoreHit, desc="") {
}
}
action(Callback_AtomicHit, desc="") {
assert(is_valid(tbe));
assert(tbe.dataValid);
assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
(tbe.reqType == CHIRequestType:AtomicStore));
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
DataBlock oldDataBlk;
oldDataBlk := tbe.dataBlk;
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
tbe.dataDirty := true;
}
action(Callback_ExpressPrefetchHit, desc="") {
// have not allocated TBE, but must clear the reservation
assert(is_invalid(tbe));
@@ -3051,6 +3478,25 @@ action(Callback_Miss, desc="") {
// also decay the timeout
scLockDecayLatency();
}
} else if (tbe.dataValid && tbe.atomic_to_be_done &&
((tbe.reqType == CHIRequestType:AtomicLoad) ||
(tbe.reqType == CHIRequestType:AtomicStore))){
assert(is_valid(tbe));
assert(tbe.dataValid);
assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
(tbe.reqType == CHIRequestType:AtomicStore));
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
DataBlock oldDataBlk;
oldDataBlk := tbe.dataBlk;
tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
tbe.dataDirty := true;
} else if (tbe.dataValid && tbe.dataAMOValid && (tbe.reqType == CHIRequestType:AtomicLoad)) {
DPRINTF(RubySlicc, "Atomic before %s\n", tbe.oldDataBlk);
sequencer.atomicCallback(tbe.addr, tbe.oldDataBlk, false);
}
}
@@ -3070,6 +3516,18 @@ action(Unset_Timeout_Cache, desc="") {
wakeup_port(snpRdyPort, address);
}
action(Callback_AtomicNoReturn, desc="") {
assert(is_valid(tbe));
assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
assert((tbe.reqType == CHIRequestType:AtomicNoReturn) ||
(tbe.reqType == CHIRequestType:AtomicStore));
if(tbe.reqType == CHIRequestType:AtomicStore){
sequencer.atomicCallback(tbe.addr, tbe.dataBlk);
DPRINTF(RubySlicc, "AtomicNoReturn %s\n", tbe.dataBlk);
}
}
action(Callback_WriteUnique, desc="") {
assert(is_valid(tbe));
assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
@@ -3183,7 +3641,7 @@ action(Profile_OutgoingEnd_DatalessResp, desc="") {
action(TagArrayRead, desc="") {
assert(is_valid(tbe));
tbe.delayNextAction := curTick() + cyclesToTicks(
tagLatency(fromSequencer(tbe.reqType)));
tagLatency(fromSequencer(tbe.reqType)));
}
action(TagArrayWrite, desc="") {
@@ -3235,6 +3693,11 @@ action(FillPipe, desc="") {
tbe.delayNextAction := curTick() + cyclesToTicks(fill_latency);
}
action(DelayAtomic, desc="") {
assert(is_valid(tbe));
tbe.delayNextAction := curTick() + cyclesToTicks(atomic_op_latency);
}
action(SnpSharedPipe, desc="") {
assert(is_valid(tbe));
tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency);

View File

@@ -302,7 +302,9 @@ Cycles dataLatency() {
bool fromSequencer(CHIRequestType reqType) {
return reqType == CHIRequestType:Load ||
reqType == CHIRequestType:Store ||
reqType == CHIRequestType:StoreLine;
reqType == CHIRequestType:StoreLine ||
reqType == CHIRequestType:AtomicLoad ||
tbe.reqType == CHIRequestType:AtomicStore;
}
bool inCache(Addr addr) {
@@ -434,6 +436,9 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
tbe.is_local_pf := in_msg.is_local_pf;
tbe.is_remote_pf := in_msg.is_remote_pf;
tbe.atomic_op.clear();
tbe.atomic_op.orMask(in_msg.atomic_op);
tbe.use_DMT := false;
tbe.use_DCT := false;
@@ -622,6 +627,13 @@ void setupPendingPartialSend(TBE tbe) {
scheduleSendData(tbe, 0);
}
void setupPendingAtomicSend(TBE tbe) {
assert(blockSize >= data_channel_size);
assert((blockSize % data_channel_size) == 0);
tbe.snd_pendBytes.setMask(0,tbe.accSize,true);
scheduleSendData(tbe, 0);
}
// common code for downstream requests
void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
out_msg.addr := tbe.addr;
@@ -644,6 +656,17 @@ void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
assert(tbe.txnId != static_cast(Addr, "value", -1));
}
void prepareRequestAtomic(TBE tbe, CHIRequestType type,
CHIRequestMsg & out_msg) {
assert((type == CHIRequestType:AtomicReturn) ||
(type == CHIRequestType:AtomicNoReturn));
prepareRequest(tbe, type, out_msg);
out_msg.accAddr := tbe.accAddr;
out_msg.accSize := tbe.accSize;
out_msg.atomic_op.clear();
out_msg.atomic_op.orMask(tbe.atomic_op);
}
void allowRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
out_msg.allowRetry := true;
tbe.pendReqAllowRetry := true;
@@ -672,6 +695,8 @@ void prepareRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
out_msg.seqReq := tbe.seqReq;
out_msg.is_local_pf := false;
out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf;
out_msg.atomic_op.clear();
out_msg.atomic_op.orMask(tbe.atomic_op);
}
void prepareRequestRetryDVM(TBE tbe, CHIRequestMsg & out_msg) {
@@ -773,8 +798,12 @@ bool needCacheEntry(CHIRequestType req_type,
(req_type == CHIRequestType:WriteEvictFull) ||
(is_HN && (req_type == CHIRequestType:WriteUniqueFull)))) ||
(alloc_on_seq_acc && ((req_type == CHIRequestType:Load) ||
(req_type == CHIRequestType:Store))) ||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine));
(req_type == CHIRequestType:Store) ||
(req_type == CHIRequestType:AtomicLoad) ||
(req_type == CHIRequestType:AtomicStore))) ||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) ||
(alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) ||
(req_type == CHIRequestType:AtomicNoReturn)));
}
}
@@ -1174,6 +1203,10 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
return Event:Store;
} else if (type == CHIRequestType:StoreLine) {
return Event:Store;
} else if (type == CHIRequestType:AtomicLoad) {
return Event:AtomicLoad;
} else if (type == CHIRequestType:AtomicStore){
return Event:AtomicStore;
} else if (type == CHIRequestType:ReadShared) {
return Event:ReadShared;
} else if (type == CHIRequestType:ReadNotSharedDirty) {
@@ -1214,6 +1247,18 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
return Event:DvmTlbi_Initiate;
} else if (type == CHIRequestType:DvmSync_Initiate) {
return Event:DvmSync_Initiate;
} else if (type == CHIRequestType:AtomicReturn){
if (is_HN) {
return Event:AtomicReturn_PoC;
} else {
return Event:AtomicReturn;
}
} else if (type == CHIRequestType:AtomicNoReturn){
if (is_HN) {
return Event:AtomicNoReturn_PoC;
} else {
return Event:AtomicNoReturn;
}
} else {
error("Invalid CHIRequestType");
}

View File

@@ -155,6 +155,12 @@ transition({BUSY_INTR,BUSY_BLKD}, FillPipe) {
ProcessNextState_ClearPending;
}
transition({BUSY_INTR,BUSY_BLKD}, DelayAtomic) {
Pop_TriggerQueue;
DelayAtomic;
ProcessNextState_ClearPending;
}
transition({BUSY_INTR,BUSY_BLKD}, SnpSharedPipe) {
Pop_TriggerQueue;
SnpSharedPipe;
@@ -418,8 +424,82 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) {
ProcessNextState;
}
// AtomicReturn and AtomicNoReturn
// Load / Store from sequencer & Prefetch from prefetcher
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_Forward;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_Forward;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
AtomicReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_LocalWrite;
Profile_Hit;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
AtomicNoReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_LocalWrite;
Profile_Hit;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
AtomicReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_LocalWrite;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
AtomicNoReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_LocalWrite;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_Miss;
Allocate_DirEntry;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicNoReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_Miss;
Allocate_DirEntry;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
// Load / Store / Atomic from sequencer & Prefetch from prefetcher
transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) {
Initiate_Request;
@@ -460,6 +540,28 @@ transition(BUSY_BLKD, StoreHit) {
ProcessNextState_ClearPending;
}
transition(UC, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
Initiate_Request;
Initiate_Atomic_UC;
Profile_Hit;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({UD,UD_T}, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
Initiate_Request;
Initiate_Atomic_UD;
Profile_Hit;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(BUSY_BLKD, AtomicHit) {
Pop_TriggerQueue;
Callback_AtomicHit;
ProcessNextState_ClearPending;
}
transition(I, {Load,Prefetch}, BUSY_BLKD) {
Initiate_Request;
Initiate_LoadMiss;
@@ -494,6 +596,55 @@ transition({BUSY_BLKD,BUSY_INTR}, UseTimeout) {
Unset_Timeout_TBE;
}
transition(I, AtomicLoad, BUSY_BLKD){
Initiate_Request;
Initiate_AtomicReturn_I;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicStore, BUSY_BLKD){
Initiate_Request;
Initiate_AtomicNoReturn_I;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(SD, AtomicLoad, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_SD;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(SC, AtomicLoad, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_SC;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(SD, AtomicStore, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_SD;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(SC, AtomicStore, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_SC;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
// Evict from Upstream
transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,RSC,RSD,RUSD,RUSC,UD_RSD,SD_RSD}, Evict, BUSY_BLKD) {
@@ -691,13 +842,15 @@ transition(BUSY_INTR, {SnpOnce,SnpOnceFwd}, BUSY_BLKD) {
transition({BUSY_BLKD,BUSY_INTR},
{ReadShared, ReadNotSharedDirty, ReadUnique, ReadUnique_PoC,
ReadOnce, CleanUnique, CleanUnique_Stale,
Load, Store, Prefetch,
Load, Store, AtomicLoad, AtomicStore, Prefetch,
WriteBackFull, WriteBackFull_Stale,
WriteEvictFull, WriteEvictFull_Stale,
WriteCleanFull, WriteCleanFull_Stale,
Evict, Evict_Stale,
WriteUnique,WriteUniquePtl_PoC,
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc}) {
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc
AtomicReturn,AtomicReturn_PoC,
AtomicNoReturn,AtomicNoReturn_PoC}) {
StallRequest;
}
@@ -754,6 +907,30 @@ transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} {
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendAtomicReturn, BUSY_INTR) {DestinationAvailable} {
Pop_TriggerQueue;
Send_AtomicReturn;
CheckARComp;
Profile_OutgoingStart;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendAtomicReturn_NoWait, BUSY_INTR) {
Pop_TriggerQueue;
Send_AtomicReturn_NoWait;
CheckARComp;
Profile_OutgoingStart;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendAtomicNoReturn, BUSY_INTR) {DestinationAvailable} {
Pop_TriggerQueue;
Send_AtomicNoReturn;
Profile_OutgoingStart;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendWriteNoSnp, BUSY_INTR) {DestinationAvailable} {
Pop_TriggerQueue;
Send_WriteNoSnp;
@@ -804,6 +981,20 @@ transition(BUSY_BLKD, SendWUDataCB) {
ProcessNextState_ClearPending;
}
transition({BUSY_BLKD,BUSY_INTR}, SendARData) {
Pop_TriggerQueue;
Send_ARData;
ProcessNextState_ClearPending;
}
transition({BUSY_BLKD,BUSY_INTR}, SendANRData) {
Pop_TriggerQueue;
Callback_AtomicNoReturn;
Send_ANRData;
CheckANRComp;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendInvSnpResp) {
Pop_TriggerQueue;
Send_InvSnpResp;
@@ -1025,6 +1216,26 @@ transition({BUSY_BLKD,BUSY_INTR}, SendComp_WU) {
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendCompDBIDResp_ANR) {
Pop_TriggerQueue;
ExpectNCBWrData_A;
Send_CompDBIDResp;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendDBIDResp_AR) {
Pop_TriggerQueue;
ExpectNCBWrData_A;
Send_DBIDResp;
ProcessNextState_ClearPending;
}
transition({BUSY_BLKD,BUSY_INTR}, SendCompData_AR) {
Pop_TriggerQueue;
Send_CompData_AR;
ProcessNextState_ClearPending;
}
transition(BUSY_BLKD, SendCompDBIDRespStale) {
Pop_TriggerQueue;
Send_CompDBIDResp_Stale;
@@ -1085,6 +1296,7 @@ transition(BUSY_BLKD,
transition({BUSY_BLKD,BUSY_INTR}, NCBWrData) {
Receive_ReqDataResp;
UpdateDataState_FromWUDataResp;
UpdateDataState_FromADataResp;
Pop_DataInQueue;
ProcessNextState;
}
@@ -1238,10 +1450,11 @@ transition(BUSY_INTR, CompDBIDResp, BUSY_BLKD) {
}
// alternative flow for WU with separate Comp
transition(BUSY_INTR, DBIDResp, BUSY_BLKD) {
transition({BUSY_INTR,BUSY_BLKD}, DBIDResp, BUSY_BLKD) {
Receive_ReqResp;
Receive_ReqResp_CopyDBID;
Receive_ReqResp_WUNeedComp;
Receive_ReqResp_AR;
Pop_RespInQueue;
ProcessNextState;
}

View File

@@ -51,6 +51,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// sending necessary snoops.
Cycles read_hit_latency := 0;
Cycles read_miss_latency := 0;
Cycles atomic_op_latency := 0;
Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req
Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data
Cycles fill_latency := 0; // Fill latency
@@ -126,11 +127,24 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// possible.
bool enable_DCT;
// Atomic Operation Policy
// All Near executes all Atomics at L1 (variable set to 0; default)
// Unique Near executes Atomics at HNF for states I, SC, SD (set to 1)
// Present Near execites all Atomics at L1 except when state is I (set to 2)
int policy_type := 1;
// Use separate Comp/DBIDResp responses for WriteUnique
bool comp_wu := "False";
// additional latency for the WU Comp response
Cycles comp_wu_latency := 0;
// Use separate Comp/DBIDResp responses for AtomicNoResponse
bool comp_anr := "False";
// additional latency for the ANR Comp response
Cycles comp_anr_latency := 0;
// Controls cache clusivity for different request types.
// set all alloc_on* to false to completelly disable caching
bool alloc_on_readshared;
@@ -139,6 +153,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
bool alloc_on_writeback;
bool alloc_on_seq_acc;
bool alloc_on_seq_line_write;
bool alloc_on_atomic;
// Controls if the clusivity is strict.
bool dealloc_on_unique;
bool dealloc_on_shared;
@@ -285,6 +300,8 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// See CHIRequestType in CHi-msg.sm for descriptions
Load, desc="", in_trans="yes";
Store, desc="", in_trans="yes";
AtomicLoad, desc="", in_trans="yes";
AtomicStore, desc="", in_trans="yes";
Prefetch, desc="", in_trans="yes";
ReadShared, desc="", in_trans="yes";
ReadNotSharedDirty, desc="", in_trans="yes";
@@ -300,6 +317,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
WriteUniquePtl_PoC, desc="", in_trans="yes";
WriteUniqueFull_PoC, desc="", in_trans="yes";
WriteUniqueFull_PoC_Alloc, desc="", in_trans="yes";
AtomicReturn, desc="", in_trans="yes";
AtomicNoReturn, desc="", in_trans="yes";
AtomicReturn_PoC, desc="", in_trans="yes";
AtomicNoReturn_PoC, desc="", in_trans="yes";
SnpCleanInvalid, desc="", in_trans="yes";
SnpShared, desc="", in_trans="yes";
SnpSharedFwd, desc="", in_trans="yes";
@@ -418,11 +439,12 @@ machine(MachineType:Cache, "Cache coherency protocol") :
DataArrayWriteOnFill, desc="Write the cache data array (cache fill)";
// Events for modeling the pipeline latency
ReadHitPipe, desc="Latency of reads served from local cache";
ReadMissPipe, desc="Latency of reads not served from local cache";
WriteFEPipe, desc="Front-end latency of write requests";
WriteBEPipe, desc="Back-end latency of write requests";
FillPipe, desc="Cache fill latency";
ReadHitPipe, desc="Latency of reads served from local cache";
ReadMissPipe, desc="Latency of reads not served from local cache";
WriteFEPipe, desc="Front-end latency of write requests";
WriteBEPipe, desc="Back-end latency of write requests";
FillPipe, desc="Cache fill latency";
DelayAtomic, desc="Atomic operation latency";
SnpSharedPipe, desc="Latency for SnpShared requests";
SnpInvPipe, desc="Latency for SnpUnique and SnpCleanInv requests";
SnpOncePipe, desc="Latency for SnpOnce requests";
@@ -435,9 +457,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
SendReadUnique, out_trans="yes", desc="Send a ReadUnique";
SendCompAck, desc="Send CompAck";
// Read handling at the completer
SendCompData, desc="Send CompData";
WaitCompAck, desc="Expect to receive CompAck";
SendRespSepData, desc="Send RespSepData for a DMT request";
SendCompData, desc="Send CompData";
WaitCompAck, desc="Expect to receive CompAck";
SendRespSepData, desc="Send RespSepData for a DMT request";
// Send a write request downstream.
SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
@@ -449,11 +471,25 @@ machine(MachineType:Cache, "Cache coherency protocol") :
SendWUData, desc="Send write unique data";
SendWUDataCB, desc="Send write unique data from a sequencer callback";
// Write handling at the completer
SendCompDBIDResp, desc="Ack WB with CompDBIDResp";
SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data";
SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data";
SendComp_WU, desc="Ack WU completion";
SendCompDBIDResp, desc="Ack WB with CompDBIDResp";
SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data";
SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data";
SendComp_WU, desc="Ack WU completion";
// Send an atomic request downstream.
SendAtomicReturn, out_trans="yes", desc="Send atomic request with return";
SendAtomicReturn_NoWait, out_trans="yes", desc="Send atomic request with return, but no DBID";
SendAtomicNoReturn, out_trans="yes", desc="Send atomic request without return";
SendARData, desc="Send atomic return request data";
SendANRData, desc="Send atomic no return request data";
// Atomic handling at the completer
SendDBIDResp_AR, desc="Ack AR with DBIDResp and set expected data";
SendCompData_AR, desc="Ack AR completion";
SendCompDBIDResp_ANR, desc="Ack ANR with CompDBIDResp and set expected data";
SendDBIDResp_ANR, desc="Ack ANR with DBIDResp and set expected data";
SendComp_ANR, desc="Ack ANR completion";
// Dataless requests
SendEvict, out_trans="yes", desc="Send a Evict";
@@ -502,6 +538,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// Misc triggers
LoadHit, desc="Complete a load hit";
StoreHit, desc="Complete a store hit";
AtomicHit, desc="Complete an atomic hit";
UseTimeout, desc="Transition from UD_T -> UD";
RestoreFromHazard, desc="Restore from a snoop hazard";
TX_Data, desc="Transmit pending data messages";
@@ -613,6 +650,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
bool is_local_pf, desc="Request generated by a local prefetcher";
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
// Atomic info associated with the transaction
WriteMask atomic_op, desc="Atomic Operation Wrapper";
bool atomic_to_be_done, desc="We have yet to perform the atomic";
// NOTE: seqReq is a smart pointer pointing to original CPU request object
// that triggers transactions associated with this TBE. seqReq carries some
// information (e.g., PC of requesting instruction, virtual address of this
@@ -630,8 +671,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// stable state.
bool hasUseTimeout, desc="Line is locked under store/use timeout";
DataBlock dataBlk, desc="Local copy of the line";
DataBlock oldDataBlk, desc="Local copy of the line before executing atomic";
WriteMask dataBlkValid, desc="Marks which bytes in the DataBlock are valid";
bool dataValid, desc="Local copy is valid";
bool dataAMOValid, desc="Local copy is valid for AMO";
bool dataDirty, desc="Local copy is dirtry";
bool dataMaybeDirtyUpstream, desc="Line maybe dirty upstream";
bool dataUnique, desc="Line is unique either locally or upsatream";

View File

@@ -46,6 +46,8 @@ enumeration(CHIRequestType, desc="") {
Load;
Store;
StoreLine;
AtomicLoad;
AtomicStore;
// Incoming DVM-related requests generated by the sequencer
DvmTlbi_Initiate;
DvmSync_Initiate;
@@ -66,6 +68,9 @@ enumeration(CHIRequestType, desc="") {
WriteUniquePtl;
WriteUniqueFull;
AtomicReturn;
AtomicNoReturn;
SnpSharedFwd;
SnpNotSharedDirtyFwd;
SnpUniqueFwd;
@@ -108,6 +113,8 @@ structure(CHIRequestMsg, desc="", interface="Message") {
bool is_local_pf, desc="Request generated by a local prefetcher";
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
WriteMask atomic_op, desc="Atomic Operation Wrapper";
bool usesTxnId, desc="True if using a Transaction ID", default="false";
Addr txnId, desc="Transaction ID", default="0";

View File

@@ -123,5 +123,14 @@ RubyRequest::functionalWrite(Packet *pkt)
return cBase < cTail;
}
void
RubyRequest::setWriteMask(uint32_t offset, uint32_t len,
std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps)
{
m_writeMask.setMask(offset, len);
m_writeMask.setAtomicOps(atomicOps);
}
} // namespace ruby
} // namespace gem5

View File

@@ -226,6 +226,8 @@ class RubyRequest : public Message
const PrefetchBit& getPrefetch() const { return m_Prefetch; }
RequestPtr getRequestPtr() const { return m_pkt->req; }
void setWriteMask(uint32_t offset, uint32_t len,
std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps);
void print(std::ostream& out) const;
bool functionalRead(Packet *pkt);
bool functionalRead(Packet *pkt, WriteMask &mask);

View File

@@ -466,8 +466,12 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
bool ruby_request = true;
while (!seq_req_list.empty()) {
SequencerRequest &seq_req = seq_req_list.front();
// Atomic Request may be executed remotly in the cache hierarchy
bool atomic_req =
((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
(seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
if (noCoales && !ruby_request) {
if ((noCoales || atomic_req) && !ruby_request) {
// Do not process follow-up requests
// (e.g. if full line no present)
// Reissue to the cache hierarchy
@@ -479,6 +483,8 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
assert(seq_req.m_type != RubyRequestType_LD);
assert(seq_req.m_type != RubyRequestType_Load_Linked);
assert(seq_req.m_type != RubyRequestType_IFETCH);
assert(seq_req.m_type != RubyRequestType_ATOMIC_RETURN);
assert(seq_req.m_type != RubyRequestType_ATOMIC_NO_RETURN);
}
// handle write request
@@ -594,6 +600,62 @@ Sequencer::readCallback(Addr address, DataBlock& data,
}
}
void
Sequencer::atomicCallback(Addr address, DataBlock& data,
const bool externalHit, const MachineType mach,
const Cycles initialRequestTime,
const Cycles forwardRequestTime,
const Cycles firstResponseTime)
{
//
// Free the first request (an atomic operation) from the list.
// Then issue the next request to ruby system as we cannot
// assume the cache line is present in the cache
// (the opperation could be performed remotly)
//
assert(address == makeLineAddress(address));
assert(m_RequestTable.find(address) != m_RequestTable.end());
auto &seq_req_list = m_RequestTable[address];
// Perform hitCallback only on the first cpu request that
// issued the ruby request
bool ruby_request = true;
while (!seq_req_list.empty()) {
SequencerRequest &seq_req = seq_req_list.front();
if (ruby_request) {
// Check that the request was an atomic memory operation
// and record the latency
assert((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
(seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
recordMissLatency(&seq_req, true, mach, externalHit,
initialRequestTime, forwardRequestTime,
firstResponseTime);
} else {
// Read, Write or Atomic request:
// reissue request to the cache hierarchy
// (we don't know if op was performed remotly)
issueRequest(seq_req.pkt, seq_req.m_second_type);
break;
}
// Atomics clean the monitor entry
llscClearMonitor(address);
markRemoved();
ruby_request = false;
hitCallback(&seq_req, data, true, mach, externalHit,
initialRequestTime, forwardRequestTime,
firstResponseTime, false);
seq_req_list.pop_front();
}
// free all outstanding requests corresponding to this address
if (seq_req_list.empty()) {
m_RequestTable.erase(address);
}
}
void
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
bool llscSuccess,
@@ -637,10 +699,16 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
(type == RubyRequestType_Load_Linked)) {
(type == RubyRequestType_Load_Linked) ||
(type == RubyRequestType_ATOMIC_RETURN)) {
pkt->setData(
data.getData(getOffset(request_address), pkt->getSize()));
DPRINTF(RubySequencer, "read data %s\n", data);
if (type == RubyRequestType_ATOMIC_RETURN) {
DPRINTF(RubySequencer, "ATOMIC RETURN data %s\n", data);
} else {
DPRINTF(RubySequencer, "read data %s\n", data);
}
} else if (pkt->req->isSwap()) {
assert(!pkt->isMaskedWrite());
std::vector<uint8_t> overwrite_val(pkt->getSize());
@@ -807,6 +875,19 @@ Sequencer::makeRequest(PacketPtr pkt)
} else if (pkt->req->isTlbiCmd()) {
primary_type = secondary_type = tlbiCmdToRubyRequestType(pkt);
DPRINTF(RubySequencer, "Issuing TLBI\n");
#if defined (PROTOCOL_CHI)
} else if (pkt->isAtomicOp()) {
if (pkt->req->isAtomicReturn()){
DPRINTF(RubySequencer, "Issuing ATOMIC RETURN \n");
primary_type = secondary_type =
RubyRequestType_ATOMIC_RETURN;
} else {
DPRINTF(RubySequencer, "Issuing ATOMIC NO RETURN\n");
primary_type = secondary_type =
RubyRequestType_ATOMIC_NO_RETURN;
}
#endif
} else {
//
// To support SwapReq, we need to check isWrite() first: a SwapReq
@@ -914,6 +995,18 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
RubyAccessMode_Supervisor, pkt,
PrefetchBit_No, proc_id, core_id);
if (pkt->isAtomicOp() &&
((secondary_type == RubyRequestType_ATOMIC_RETURN) ||
(secondary_type == RubyRequestType_ATOMIC_NO_RETURN))){
// Create the blocksize, access mask and atomicops
uint32_t offset = getOffset(pkt->getAddr());
std::vector<std::pair<int,AtomicOpFunctor*>> atomicOps;
atomicOps.push_back(std::make_pair<int,AtomicOpFunctor*>
(offset, pkt->getAtomicOp()));
msg->setWriteMask(offset, pkt->getSize(), atomicOps);
}
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
curTick(), m_version, "Seq", "Begin", "", "",
printAddress(msg->getPhysicalAddress()),

View File

@@ -126,6 +126,14 @@ class Sequencer : public RubyPort
const Cycles forwardRequestTime = Cycles(0),
const Cycles firstResponseTime = Cycles(0));
void atomicCallback(Addr address,
DataBlock& data,
const bool externalHit = false,
const MachineType mach = MachineType_NUM,
const Cycles initialRequestTime = Cycles(0),
const Cycles forwardRequestTime = Cycles(0),
const Cycles firstResponseTime = Cycles(0));
void unaddressedCallback(Addr unaddressedReqId,
RubyRequestType requestType,
const MachineType mach = MachineType_NUM,