mem-ruby: LL/SC fixes

The implementation for load-linked/store-conditional did not work
correctly for multi-core simulations. Since load-links were treated as
stores, it was not possible for a line to have multiple readers which
often resulted in livelock when using these instructions to implemented
mutexes. This improved implementation treats load-linked instructions
similarly to loads but locks the line after a copy has been fetched
locally. Writes to a monitored address ensure the 'linked' property is
blown away and any subsequent store-conditional will fail.

Change-Id: I19bd74459e26732c92c8b594901936e6439fb073
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27103
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Timothy Hayes
2020-04-27 15:03:26 +01:00
committed by Pouya Fotouhi
parent 40b79073f0
commit dd6cd33344
3 changed files with 155 additions and 73 deletions

View File

@@ -1,4 +1,16 @@
/*
* Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -112,6 +124,10 @@ structure (Sequencer, external = "yes") {
void writeCallback(Addr, DataBlock, bool, MachineType,
Cycles, Cycles, Cycles);
// ll/sc support
void writeCallbackScFail(Addr, DataBlock);
bool llscCheckMonitor(Addr);
void checkCoherence(Addr);
void evictionCallback(Addr);
void recordRequestType(SequencerRequestType);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 ARM Limited
* Copyright (c) 2019-2020 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
#include "base/logging.hh"
#include "base/str.hh"
#include "cpu/testers/rubytest/RubyTester.hh"
#include "debug/LLSC.hh"
#include "debug/MemoryAccess.hh"
#include "debug/ProtocolTrace.hh"
#include "debug/RubySequencer.hh"
@@ -89,6 +90,64 @@ Sequencer::~Sequencer()
{
}
void
Sequencer::llscLoadLinked(const Addr claddr)
{
AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
if (line) {
line->setLocked(m_version);
DPRINTF(LLSC, "LLSC Monitor - inserting load linked - "
"addr=0x%lx - cpu=%u\n", claddr, m_version);
}
}
void
Sequencer::llscClearMonitor(const Addr claddr)
{
AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
if (line && line->isLocked(m_version)) {
line->clearLocked();
DPRINTF(LLSC, "LLSC Monitor - clearing due to store - "
"addr=0x%lx - cpu=%u\n", claddr, m_version);
}
}
bool
Sequencer::llscStoreConditional(const Addr claddr)
{
AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
if (!line)
return false;
DPRINTF(LLSC, "LLSC Monitor - clearing due to "
"store conditional - "
"addr=0x%lx - cpu=%u\n",
claddr, m_version);
if (line->isLocked(m_version)) {
line->clearLocked();
return true;
} else {
line->clearLocked();
return false;
}
}
bool
Sequencer::llscCheckMonitor(const Addr address)
{
const Addr claddr = makeLineAddress(address);
AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
if (!line)
return false;
if (line->isLocked(m_version)) {
return true;
} else {
return false;
}
}
void
Sequencer::wakeup()
{
@@ -202,62 +261,6 @@ Sequencer::markRemoved()
m_outstanding_count--;
}
void
Sequencer::invalidateSC(Addr address)
{
AbstractCacheEntry *e = m_dataCache_ptr->lookup(address);
// The controller has lost the coherence permissions, hence the lock
// on the cache line maintained by the cache should be cleared.
if (e && e->isLocked(m_version)) {
e->clearLocked();
}
}
bool
Sequencer::handleLlsc(Addr address, SequencerRequest* request)
{
AbstractCacheEntry *e = m_dataCache_ptr->lookup(address);
if (!e)
return true;
// The success flag indicates whether the LLSC operation was successful.
// LL ops will always succeed, but SC may fail if the cache line is no
// longer locked.
bool success = true;
if (request->m_type == RubyRequestType_Store_Conditional) {
if (!e->isLocked(m_version)) {
//
// For failed SC requests, indicate the failure to the cpu by
// setting the extra data to zero.
//
request->pkt->req->setExtraData(0);
success = false;
} else {
//
// For successful SC requests, indicate the success to the cpu by
// setting the extra data to one.
//
request->pkt->req->setExtraData(1);
}
//
// Independent of success, all SC operations must clear the lock
//
e->clearLocked();
} else if (request->m_type == RubyRequestType_Load_Linked) {
//
// Note: To fully follow Alpha LLSC semantics, should the LL clear any
// previously locked cache lines?
//
e->setLocked(m_version);
} else if (e->isLocked(m_version)) {
//
// Normal writes should clear the locked address
//
e->clearLocked();
}
return success;
}
void
Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess,
const MachineType respondingMach,
@@ -324,6 +327,13 @@ Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess,
}
}
void
Sequencer::writeCallbackScFail(Addr address, DataBlock& data)
{
llscClearMonitor(address);
writeCallback(address, data);
}
void
Sequencer::writeCallback(Addr address, DataBlock& data,
const bool externalHit, const MachineType mach,
@@ -349,21 +359,27 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
SequencerRequest &seq_req = seq_req_list.front();
if (ruby_request) {
assert(seq_req.m_type != RubyRequestType_LD);
assert(seq_req.m_type != RubyRequestType_Load_Linked);
assert(seq_req.m_type != RubyRequestType_IFETCH);
}
// handle write request
if ((seq_req.m_type != RubyRequestType_LD) &&
(seq_req.m_type != RubyRequestType_Load_Linked) &&
(seq_req.m_type != RubyRequestType_IFETCH)) {
//
// For Alpha, properly handle LL, SC, and write requests with
// respect to locked cache blocks.
//
// Not valid for Garnet_standalone protocl
//
bool success = true;
if (!m_runningGarnetStandalone)
success = handleLlsc(address, &seq_req);
// LL/SC support (tested with ARMv8)
bool success = false;
if (seq_req.m_type != RubyRequestType_Store_Conditional) {
// Regular stores to addresses being monitored
// will fail (remove) the monitor entry.
llscClearMonitor(address);
} else {
// Store conditionals must first check the monitor
// if they will succeed or not
success = llscStoreConditional(address);
seq_req.pkt->req->setExtraData(success ? 1 : 0);
}
// Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the
// address variable here is assumed to be a line address, so when
@@ -435,11 +451,13 @@ Sequencer::readCallback(Addr address, DataBlock& data,
SequencerRequest &seq_req = seq_req_list.front();
if (ruby_request) {
assert((seq_req.m_type == RubyRequestType_LD) ||
(seq_req.m_type == RubyRequestType_Load_Linked) ||
(seq_req.m_type == RubyRequestType_IFETCH));
} else {
aliased_loads++;
}
if ((seq_req.m_type != RubyRequestType_LD) &&
(seq_req.m_type != RubyRequestType_Load_Linked) &&
(seq_req.m_type != RubyRequestType_IFETCH)) {
// Write request: reissue request to the cache hierarchy
issueRequest(seq_req.pkt, seq_req.m_second_type);
@@ -480,6 +498,12 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
Addr request_address(pkt->getAddr());
RubyRequestType type = srequest->m_type;
// Load-linked handling
if (type == RubyRequestType_Load_Linked) {
Addr line_addr = makeLineAddress(request_address);
llscLoadLinked(line_addr);
}
// update the data unless it is a non-data-carrying flush
if (RubySystem::getWarmupEnabled()) {
data.setData(pkt->getConstPtr<uint8_t>(),
@@ -553,23 +577,26 @@ Sequencer::makeRequest(PacketPtr pkt)
RubyRequestType secondary_type = RubyRequestType_NULL;
if (pkt->isLLSC()) {
//
// Alpha LL/SC instructions need to be handled carefully by the cache
// LL/SC instructions need to be handled carefully by the cache
// coherence protocol to ensure they follow the proper semantics. In
// particular, by identifying the operations as atomic, the protocol
// should understand that migratory sharing optimizations should not
// be performed (i.e. a load between the LL and SC should not steal
// away exclusive permission).
//
// The following logic works correctly with the semantics
// of armV8 LDEX/STEX instructions.
if (pkt->isWrite()) {
DPRINTF(RubySequencer, "Issuing SC\n");
primary_type = RubyRequestType_Store_Conditional;
secondary_type = RubyRequestType_ST;
} else {
DPRINTF(RubySequencer, "Issuing LL\n");
assert(pkt->isRead());
primary_type = RubyRequestType_Load_Linked;
secondary_type = RubyRequestType_LD;
}
secondary_type = RubyRequestType_ATOMIC;
} else if (pkt->req->isLockedRMW()) {
//
// x86 locked instructions are translated to store cache coherence
@@ -724,6 +751,7 @@ Sequencer::recordRequestType(SequencerRequestType requestType) {
void
Sequencer::evictionCallback(Addr address)
{
llscClearMonitor(address);
ruby_eviction_callback(address);
}

View File

@@ -1,6 +1,6 @@
/*
* Copyright (c) 2019 ARM Limited
* All rights reserved.
* Copyright (c) 2019-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
@@ -84,6 +84,13 @@ class Sequencer : public RubyPort
Sequencer(const Params *);
~Sequencer();
/**
* Proxy function to writeCallback that first
* invalidates the line address in the local monitor.
*/
void writeCallbackScFail(Addr address,
DataBlock& data);
// Public Methods
void wakeup(); // Used only for deadlock detection
void resetStats() override;
@@ -121,7 +128,6 @@ class Sequencer : public RubyPort
void markRemoved();
void evictionCallback(Addr address);
void invalidateSC(Addr address);
int coreId() const { return m_coreId; }
virtual int functionalWrite(Packet *func_pkt) override;
@@ -191,7 +197,6 @@ class Sequencer : public RubyPort
RequestStatus insertRequest(PacketPtr pkt, RubyRequestType primary_type,
RubyRequestType secondary_type);
bool handleLlsc(Addr address, SequencerRequest* request);
// Private copy constructor and assignment operator
Sequencer(const Sequencer& obj);
@@ -257,6 +262,39 @@ class Sequencer : public RubyPort
std::vector<Stats::Counter> m_IncompleteTimes;
EventFunctionWrapper deadlockCheckEvent;
// support for LL/SC
/**
* Places the cache line address into the global monitor
* tagged with this Sequencer object's version id.
*/
void llscLoadLinked(const Addr);
/**
* Removes the cache line address from the global monitor.
* This is independent of this Sequencer object's version id.
*/
void llscClearMonitor(const Addr);
/**
* Searches for cache line address in the global monitor
* tagged with this Sequencer object's version id.
* If a match is found, the entry is is erased from
* the global monitor.
*
* @return a boolean indicating if the line address was found.
*/
bool llscStoreConditional(const Addr);
public:
/**
* Searches for cache line address in the global monitor
* tagged with this Sequencer object's version id.
*
* @return a boolean indicating if the line address was found.
*/
bool llscCheckMonitor(const Addr);
};
inline std::ostream&