riscv: fix AMO, LR and SC instructions

(1) Atomic Memory Operation (AMO)

This patch changes how RISC-V AMO instructions are implemented. For each
AMO, instead of issuing a locking load and an unlocking store request to
downstream memory system, this patch issues a single memory request that
contains a corresponding AtomicOpFunctor to the memory system. Once the
memory system receives the request, the atomic operation is executed in
one single step.

This patch also changes how AMO instructions handle acquire and release
flags in AMOs (e.g., amoadd.aq and amoadd.rl). If an AMO is associated
with an acquire flag, a memory fence is inserted after the AMO completes
as a micro-op. If an AMO is associated with a release flag, another
memory fence is inserted before the AMO executes. If both flags are
specified, the AMO is broken down into a sequence of 3 micro-ops:
mem fence -> atomic RMW -> mem fence. This change makes this AMO
implementation comply to the release consistency model.

(2) Load-Reserved (LR) and Store-Conditional (SC)

Addresses locked by LR instructions are tracked in a stack data
structure. LR instruction pushes its target address to the stack, and SC
instruction pops the top address from the stack. As specified by RISC-V
ISA, a SC fails if its target address does not match with the most recent
LR.

Previously, there was a single stack for all hardware thread contexts.
A shared stack between thread contexts can lead to a infinite sequence
of failed SCs if LRs from other threads keep pushing new addresses to
this stack.

This patch gives each context its private stack to address the problem.

This patch also adds extra memory fence micro-ops to lr/sc to guarantee
a correct execution order of memory instructions with respect to release
consistency model.

Change-Id: I1e95900367c89dd866ba872a5203f63359ac51ae
Reviewed-on: https://gem5-review.googlesource.com/c/8189
Reviewed-by: Alec Roelke <ar4jc@virginia.edu>
Maintainer: Alec Roelke <ar4jc@virginia.edu>
This commit is contained in:
Tuan Ta
2018-02-12 23:13:34 -05:00
parent 25dc765889
commit 4f4846c532
6 changed files with 533 additions and 219 deletions

View File

@@ -43,6 +43,22 @@ using namespace std;
namespace RiscvISA
{
// memfence micro instruction
string MemFenceMicro::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
stringstream ss;
ss << csprintf("0x%08x", machInst) << ' ' << mnemonic;
return ss.str();
}
Fault MemFenceMicro::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
return NoFault;
}
// load-reserved
string LoadReserved::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
@@ -52,6 +68,16 @@ string LoadReserved::generateDisassembly(Addr pc,
return ss.str();
}
string LoadReservedMicro::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
stringstream ss;
ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", ("
<< registerName(_srcRegIdx[0]) << ')';
return ss.str();
}
// store-conditional
string StoreCond::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
@@ -62,6 +88,17 @@ string StoreCond::generateDisassembly(Addr pc,
return ss.str();
}
string StoreCondMicro::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
stringstream ss;
ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", "
<< registerName(_srcRegIdx[1]) << ", ("
<< registerName(_srcRegIdx[0]) << ')';
return ss.str();
}
// AMOs
string AtomicMemOp::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
@@ -76,8 +113,10 @@ string AtomicMemOpMicro::generateDisassembly(Addr pc,
const SymbolTable *symtab) const
{
stringstream ss;
ss << csprintf("0x%08x", machInst) << ' ' << mnemonic;
ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", "
<< registerName(_srcRegIdx[1]) << ", ("
<< registerName(_srcRegIdx[0]) << ')';
return ss.str();
}
}
}

View File

@@ -41,24 +41,62 @@
namespace RiscvISA
{
class LoadReserved : public MemInst
// memfence micro instruction
class MemFenceMicro : public RiscvMicroInst
{
public:
MemFenceMicro(ExtMachInst _machInst, OpClass __opClass)
: RiscvMicroInst("fence", _machInst, __opClass)
{ }
protected:
using RiscvMicroInst::RiscvMicroInst;
Fault execute(ExecContext *, Trace::InstRecord *) const override;
std::string generateDisassembly(
Addr pc, const SymbolTable *symtab) const override;
};
// load-reserved
class LoadReserved : public RiscvMacroInst
{
protected:
using MemInst::MemInst;
using RiscvMacroInst::RiscvMacroInst;
std::string generateDisassembly(
Addr pc, const SymbolTable *symtab) const override;
};
class StoreCond : public MemInst
class LoadReservedMicro : public RiscvMicroInst
{
protected:
using MemInst::MemInst;
Request::Flags memAccessFlags;
using RiscvMicroInst::RiscvMicroInst;
std::string generateDisassembly(
Addr pc, const SymbolTable *symtab) const override;
};
// store-cond
class StoreCond : public RiscvMacroInst
{
protected:
using RiscvMacroInst::RiscvMacroInst;
std::string generateDisassembly(
Addr pc, const SymbolTable *symtab) const override;
};
class StoreCondMicro : public RiscvMicroInst
{
protected:
Request::Flags memAccessFlags;
using RiscvMicroInst::RiscvMicroInst;
std::string generateDisassembly(
Addr pc, const SymbolTable *symtab) const override;
};
// AMOs
class AtomicMemOp : public RiscvMacroInst
{
protected:
@@ -78,6 +116,23 @@ class AtomicMemOpMicro : public RiscvMicroInst
Addr pc, const SymbolTable *symtab) const override;
};
/**
* A generic atomic op class
*/
template<typename T>
class AtomicGenericOp : public TypedAtomicOpFunctor<T>
{
public:
AtomicGenericOp(T _a, std::function<void(T*,T)> _op)
: a(_a), op(_op) { }
AtomicOpFunctor* clone() { return new AtomicGenericOp<T>(*this); }
void execute(T *b) { op(b, a); }
private:
T a;
std::function<void(T*,T)> op;
};
}
#endif // __ARCH_RISCV_INSTS_AMO_HH__
#endif // __ARCH_RISCV_INSTS_AMO_HH__

View File

@@ -512,44 +512,69 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
Rd = result;
}}, inst_flags=IsStoreConditional, mem_flags=LLSC);
format AtomicMemOp {
0x0: amoadd_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = Rs2_sw + Rt_sd;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x1: amoswap_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = Rs2_uw;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x4: amoxor_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = Rs2_uw^Rt_sd;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x8: amoor_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = Rs2_uw | Rt_sd;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0xc: amoand_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = Rs2_uw&Rt_sd;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x10: amomin_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = min<int32_t>(Rs2_sw, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x14: amomax_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = max<int32_t>(Rs2_sw, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x18: amominu_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = min<uint32_t>(Rs2_uw, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x1c: amomaxu_w({{Rt_sd = Mem_sw;}}, {{
Mem_sw = max<uint32_t>(Rs2_uw, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
}
0x0: AtomicMemOp::amoadd_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1: AtomicMemOp::amoswap_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x4: AtomicMemOp::amoxor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x8: AtomicMemOp::amoor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0xc: AtomicMemOp::amoand_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x10: AtomicMemOp::amomin_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x14: AtomicMemOp::amomax_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x18: AtomicMemOp::amominu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1c: AtomicMemOp::amomaxu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}
0x3: decode AMOFUNCT {
0x2: LoadReserved::lr_d({{
@@ -560,44 +585,69 @@ decode QUADRANT default Unknown::unknown() {
}}, {{
Rd = result;
}}, mem_flags=LLSC, inst_flags=IsStoreConditional);
format AtomicMemOp {
0x0: amoadd_d({{Rt_sd = Mem_sd;}}, {{
Mem_sd = Rs2_sd + Rt_sd;
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x1: amoswap_d({{Rt = Mem;}}, {{
Mem = Rs2;
Rd = Rt;
}}, {{EA = Rs1;}});
0x4: amoxor_d({{Rt = Mem;}}, {{
Mem = Rs2^Rt;
Rd = Rt;
}}, {{EA = Rs1;}});
0x8: amoor_d({{Rt = Mem;}}, {{
Mem = Rs2 | Rt;
Rd = Rt;
}}, {{EA = Rs1;}});
0xc: amoand_d({{Rt = Mem;}}, {{
Mem = Rs2&Rt;
Rd = Rt;
}}, {{EA = Rs1;}});
0x10: amomin_d({{Rt_sd = Mem_sd;}}, {{
Mem_sd = min(Rs2_sd, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x14: amomax_d({{Rt_sd = Mem_sd;}}, {{
Mem_sd = max(Rs2_sd, Rt_sd);
Rd_sd = Rt_sd;
}}, {{EA = Rs1;}});
0x18: amominu_d({{Rt = Mem;}}, {{
Mem = min(Rs2, Rt);
Rd = Rt;
}}, {{EA = Rs1;}});
0x1c: amomaxu_d({{Rt = Mem;}}, {{
Mem = max(Rs2, Rt);
Rd = Rt;
}}, {{EA = Rs1;}});
}
0x0: AtomicMemOp::amoadd_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1: AtomicMemOp::amoswap_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x4: AtomicMemOp::amoxor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x8: AtomicMemOp::amoor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0xc: AtomicMemOp::amoand_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x10: AtomicMemOp::amomin_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x14: AtomicMemOp::amomax_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x18: AtomicMemOp::amominu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1c: AtomicMemOp::amomaxu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}
}
0x0c: decode FUNCT3 {

View File

@@ -29,10 +29,7 @@
//
// Authors: Alec Roelke
////////////////////////////////////////////////////////////////////
//
// Atomic memory operation instructions
//
// Declaration templates
def template AtomicMemOpDeclare {{
/**
* Static instruction class for an AtomicMemOp operation
@@ -45,24 +42,14 @@ def template AtomicMemOpDeclare {{
protected:
class %(class_name)sLoad : public %(base_class)sMicro
/*
* The main RMW part of an AMO
*/
class %(class_name)sRMW : public %(base_class)sMicro
{
public:
// Constructor
%(class_name)sLoad(ExtMachInst machInst, %(class_name)s *_p);
Fault execute(ExecContext *, Trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *,
Trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
Trace::InstRecord *) const override;
};
class %(class_name)sStore : public %(base_class)sMicro
{
public:
// Constructor
%(class_name)sStore(ExtMachInst machInst, %(class_name)s *_p);
%(class_name)sRMW(ExtMachInst machInst, %(class_name)s *_p);
Fault execute(ExecContext *, Trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *,
@@ -73,15 +60,90 @@ def template AtomicMemOpDeclare {{
};
}};
def template LRSCConstructor {{
def template LRSCDeclare {{
/**
* Static instruction class for an AtomicMemOp operation
*/
class %(class_name)s : public %(base_class)s
{
public:
// Constructor
%(class_name)s(ExtMachInst machInst);
protected:
class %(class_name)sMicro : public %(base_class)sMicro
{
public:
// Constructor
%(class_name)sMicro(ExtMachInst machInst, %(class_name)s *_p);
Fault execute(ExecContext *, Trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *,
Trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
Trace::InstRecord *) const override;
};
};
}};
// Constructor templates
def template LRSCMacroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst):
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(constructor)s;
if (AQ)
memAccessFlags = memAccessFlags | Request::ACQUIRE;
if (RL)
memAccessFlags = memAccessFlags | Request::RELEASE;
StaticInstPtr rel_fence;
StaticInstPtr lrsc;
StaticInstPtr acq_fence;
// set up release fence
if (RL) {
rel_fence = new MemFenceMicro(machInst, No_OpClass);
rel_fence->setFlag(IsFirstMicroop);
rel_fence->setFlag(IsMemBarrier);
rel_fence->setFlag(IsDelayedCommit);
}
// set up atomic rmw op
lrsc = new %(class_name)sMicro(machInst, this);
if (!RL) {
lrsc->setFlag(IsFirstMicroop);
}
if (!AQ) {
lrsc->setFlag(IsLastMicroop);
} else {
lrsc->setFlag(IsDelayedCommit);
}
// set up acquire fence
if (AQ) {
acq_fence = new MemFenceMicro(machInst, No_OpClass);
acq_fence->setFlag(IsLastMicroop);
acq_fence->setFlag(IsMemBarrier);
}
if (RL && AQ) {
microops = {rel_fence, lrsc, acq_fence};
} else if (RL) {
microops = {rel_fence, lrsc};
} else if (AQ) {
microops = {lrsc, acq_fence};
} else {
microops = {lrsc};
}
}
}};
def template LRSCMicroConstructor {{
%(class_name)s::%(class_name)sMicro::%(class_name)sMicro(
ExtMachInst machInst, %(class_name)s *_p)
: %(base_class)sMicro("%(mnemonic)s", machInst, %(op_class)s)
{
%(constructor)s;
}
}};
@@ -90,39 +152,95 @@ def template AtomicMemOpMacroConstructor {{
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(constructor)s;
microops = {new %(class_name)sLoad(machInst, this),
new %(class_name)sStore(machInst, this)};
StaticInstPtr rel_fence;
StaticInstPtr rmw_op;
StaticInstPtr acq_fence;
// set up release fence
if (RL) {
rel_fence = new MemFenceMicro(machInst, No_OpClass);
rel_fence->setFlag(IsFirstMicroop);
rel_fence->setFlag(IsMemBarrier);
rel_fence->setFlag(IsDelayedCommit);
}
// set up atomic rmw op
rmw_op = new %(class_name)sRMW(machInst, this);
if (!RL) {
rmw_op->setFlag(IsFirstMicroop);
}
if (!AQ) {
rmw_op->setFlag(IsLastMicroop);
} else {
rmw_op->setFlag(IsDelayedCommit);
}
// set up acquire fence
if (AQ) {
acq_fence = new MemFenceMicro(machInst, No_OpClass);
acq_fence->setFlag(IsLastMicroop);
acq_fence->setFlag(IsMemBarrier);
}
if (RL && AQ) {
microops = {rel_fence, rmw_op, acq_fence};
} else if (RL) {
microops = {rel_fence, rmw_op};
} else if (AQ) {
microops = {rmw_op, acq_fence};
} else {
microops = {rmw_op};
}
}
}};
def template AtomicMemOpLoadConstructor {{
%(class_name)s::%(class_name)sLoad::%(class_name)sLoad(
def template AtomicMemOpRMWConstructor {{
%(class_name)s::%(class_name)sRMW::%(class_name)sRMW(
ExtMachInst machInst, %(class_name)s *_p)
: %(base_class)s("%(mnemonic)s[l]", machInst, %(op_class)s)
{
%(constructor)s;
flags[IsFirstMicroop] = true;
flags[IsDelayedCommit] = true;
if (AQ)
memAccessFlags = Request::ACQUIRE;
// overwrite default flags
flags[IsMemRef] = true;
flags[IsLoad] = false;
flags[IsStore] = false;
flags[IsAtomic] = true;
}
}};
def template AtomicMemOpStoreConstructor {{
%(class_name)s::%(class_name)sStore::%(class_name)sStore(
ExtMachInst machInst, %(class_name)s *_p)
: %(base_class)s("%(mnemonic)s[s]", machInst, %(op_class)s)
// execute() templates
def template LoadReservedExecute {{
Fault
%(class_name)s::%(class_name)sMicro::execute(
ExecContext *xc, Trace::InstRecord *traceData) const
{
%(constructor)s;
flags[IsLastMicroop] = true;
flags[IsNonSpeculative] = true;
if (RL)
memAccessFlags = Request::RELEASE;
Addr EA;
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
if (fault == NoFault) {
fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags);
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template StoreCondExecute {{
Fault %(class_name)s::execute(ExecContext *xc,
Fault %(class_name)s::%(class_name)sMicro::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
@@ -157,8 +275,8 @@ def template StoreCondExecute {{
}
}};
def template AtomicMemOpLoadExecute {{
Fault %(class_name)s::%(class_name)sLoad::execute(ExecContext *xc,
def template AtomicMemOpRMWExecute {{
Fault %(class_name)s::%(class_name)sRMW::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
@@ -167,13 +285,18 @@ def template AtomicMemOpLoadExecute {{
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
%(amoop_code)s;
assert(amo_op);
if (fault == NoFault) {
fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags);
fault = amoMemAtomic(xc, traceData, Mem, EA, memAccessFlags,
amo_op);
%(memacc_code)s;
}
if (fault == NoFault) {
%(code)s;
%(postacc_code)s;
}
if (fault == NoFault) {
@@ -184,36 +307,11 @@ def template AtomicMemOpLoadExecute {{
}
}};
def template AtomicMemOpStoreExecute {{
Fault %(class_name)s::%(class_name)sStore::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
// initiateAcc() templates
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
if (fault == NoFault) {
%(code)s;
}
if (fault == NoFault) {
fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags,
nullptr);
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template AtomicMemOpLoadInitiateAcc {{
Fault %(class_name)s::%(class_name)sLoad::initiateAcc(ExecContext *xc,
def template LoadReservedInitiateAcc {{
Fault
%(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
@@ -231,9 +329,10 @@ def template AtomicMemOpLoadInitiateAcc {{
}
}};
def template AtomicMemOpStoreInitiateAcc {{
Fault %(class_name)s::%(class_name)sStore::initiateAcc(
ExecContext *xc, Trace::InstRecord *traceData) const
def template StoreCondInitiateAcc {{
Fault
%(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
@@ -243,12 +342,62 @@ def template AtomicMemOpStoreInitiateAcc {{
%(ea_code)s;
if (fault == NoFault) {
%(code)s;
%(memacc_code)s;
}
if (fault == NoFault) {
fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags,
nullptr);
fault = writeMemTiming(xc, traceData, Mem, EA,
memAccessFlags, nullptr);
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template AtomicMemOpRMWInitiateAcc {{
Fault
%(class_name)s::%(class_name)sRMW::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
%(op_src_decl)s;
%(op_rd)s;
%(ea_code)s;
%(amoop_code)s;
assert(amo_op);
if (fault == NoFault) {
fault = initiateMemAMO(xc, traceData, EA, Mem, memAccessFlags,
amo_op);
}
return fault;
}
}};
// completeAcc() templates
def template LoadReservedCompleteAcc {{
Fault
%(class_name)s::%(class_name)sMicro::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
getMem(pkt, Mem, traceData);
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
@@ -260,8 +409,8 @@ def template AtomicMemOpStoreInitiateAcc {{
}};
def template StoreCondCompleteAcc {{
Fault %(class_name)s::completeAcc(Packet *pkt, ExecContext *xc,
Trace::InstRecord *traceData) const
Fault %(class_name)s::%(class_name)sMicro::completeAcc(Packet *pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
@@ -283,8 +432,8 @@ def template StoreCondCompleteAcc {{
}
}};
def template AtomicMemOpLoadCompleteAcc {{
Fault %(class_name)s::%(class_name)sLoad::completeAcc(PacketPtr pkt,
def template AtomicMemOpRMWCompleteAcc {{
Fault %(class_name)s::%(class_name)sRMW::completeAcc(Packet *pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
@@ -295,7 +444,7 @@ def template AtomicMemOpLoadCompleteAcc {{
getMem(pkt, Mem, traceData);
if (fault == NoFault) {
%(code)s;
%(memacc_code)s;
}
if (fault == NoFault) {
@@ -306,16 +455,20 @@ def template AtomicMemOpLoadCompleteAcc {{
}
}};
def template AtomicMemOpStoreCompleteAcc {{
Fault %(class_name)s::%(class_name)sStore::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
return NoFault;
}
}};
// LR/SC/AMO decode formats
def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
mem_flags=[], inst_flags=[]) {{
macro_ea_code = ''
macro_inst_flags = []
macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code,
macro_inst_flags)
header_output = LRSCDeclare.subst(macro_iop)
decoder_output = LRSCMacroConstructor.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)
exec_output = ''
mem_flags = makeList(mem_flags)
inst_flags = makeList(inst_flags)
iop = InstObjParams(name, Name, 'LoadReserved',
@@ -324,16 +477,25 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
'|'.join(['Request::%s' % flag for flag in mem_flags]) + ';'
header_output = LoadStoreDeclare.subst(iop)
decoder_output = LRSCConstructor.subst(iop)
decode_block = BasicDecode.subst(iop)
exec_output = LoadExecute.subst(iop) \
+ LoadInitiateAcc.subst(iop) \
+ LoadCompleteAcc.subst(iop)
decoder_output += LRSCMicroConstructor.subst(iop)
decode_block += BasicDecode.subst(iop)
exec_output += LoadReservedExecute.subst(iop) \
+ LoadReservedInitiateAcc.subst(iop) \
+ LoadReservedCompleteAcc.subst(iop)
}};
def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
mem_flags=[], inst_flags=[]) {{
macro_ea_code = ''
macro_inst_flags = []
macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code,
macro_inst_flags)
header_output = LRSCDeclare.subst(macro_iop)
decoder_output = LRSCMacroConstructor.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)
exec_output = ''
mem_flags = makeList(mem_flags)
inst_flags = makeList(inst_flags)
iop = InstObjParams(name, Name, 'StoreCond',
@@ -342,37 +504,40 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
'|'.join(['Request::%s' % flag for flag in mem_flags]) + ';'
header_output = LoadStoreDeclare.subst(iop)
decoder_output = LRSCConstructor.subst(iop)
decode_block = BasicDecode.subst(iop)
exec_output = StoreCondExecute.subst(iop) \
+ StoreInitiateAcc.subst(iop) \
decoder_output += LRSCMicroConstructor.subst(iop)
decode_block += BasicDecode.subst(iop)
exec_output += StoreCondExecute.subst(iop) \
+ StoreCondInitiateAcc.subst(iop) \
+ StoreCondCompleteAcc.subst(iop)
}};
def format AtomicMemOp(load_code, store_code, ea_code, load_flags=[],
store_flags=[], inst_flags=[]) {{
macro_iop = InstObjParams(name, Name, 'AtomicMemOp', ea_code, inst_flags)
def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }},
ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{
macro_ea_code = ''
macro_inst_flags = []
macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code,
macro_inst_flags)
header_output = AtomicMemOpDeclare.subst(macro_iop)
decoder_output = AtomicMemOpMacroConstructor.subst(macro_iop)
decode_block = BasicDecode.subst(macro_iop)
exec_output = ''
load_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsLoad"]
load_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
{'ea_code': ea_code, 'code': load_code, 'op_name': 'Load'},
load_inst_flags)
decoder_output += AtomicMemOpLoadConstructor.subst(load_iop)
exec_output += AtomicMemOpLoadExecute.subst(load_iop) \
+ AtomicMemOpLoadInitiateAcc.subst(load_iop) \
+ AtomicMemOpLoadCompleteAcc.subst(load_iop)
rmw_mem_flags = makeList(mem_flags)
rmw_inst_flags = makeList(inst_flags)
rmw_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
{'ea_code': ea_code,
'memacc_code': memacc_code,
'postacc_code': postacc_code,
'amoop_code': amoop_code},
rmw_inst_flags)
store_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsStore"]
store_iop = InstObjParams(name, Name, 'AtomicMemOpMicro',
{'ea_code': ea_code, 'code': store_code, 'op_name': 'Store'},
store_inst_flags)
decoder_output += AtomicMemOpStoreConstructor.subst(store_iop)
exec_output += AtomicMemOpStoreExecute.subst(store_iop) \
+ AtomicMemOpStoreInitiateAcc.subst(store_iop) \
+ AtomicMemOpStoreCompleteAcc.subst(store_iop)
rmw_iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \
'|'.join(['Request::%s' % flag for flag in rmw_mem_flags]) + ';'
decoder_output += AtomicMemOpRMWConstructor.subst(rmw_iop)
decode_block += BasicDecode.subst(rmw_iop)
exec_output += AtomicMemOpRMWExecute.subst(rmw_iop) \
+ AtomicMemOpRMWInitiateAcc.subst(rmw_iop) \
+ AtomicMemOpRMWCompleteAcc.subst(rmw_iop)
}};

View File

@@ -6,7 +6,5 @@
namespace RiscvISA
{
std::stack<Addr> locked_addrs;
std::unordered_map<int, std::stack<Addr>> locked_addrs;
}

View File

@@ -49,6 +49,7 @@
#define __ARCH_RISCV_LOCKED_MEM_HH__
#include <stack>
#include <unordered_map>
#include "arch/registers.hh"
#include "base/logging.hh"
@@ -67,24 +68,28 @@ const int WARN_FAILURE = 10000;
// RISC-V allows multiple locks per hart, but each SC has to unlock the most
// recent one, so we use a stack here.
extern std::stack<Addr> locked_addrs;
extern std::unordered_map<int, std::stack<Addr>> locked_addrs;
template <class XC> inline void
handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
{
if (locked_addrs.empty())
std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
if (locked_addr_stack.empty())
return;
Addr snoop_addr = pkt->getAddr() & cacheBlockMask;
DPRINTF(LLSC, "Locked snoop on address %x.\n", snoop_addr);
if ((locked_addrs.top() & cacheBlockMask) == snoop_addr)
locked_addrs.pop();
if ((locked_addr_stack.top() & cacheBlockMask) == snoop_addr)
locked_addr_stack.pop();
}
template <class XC> inline void
handleLockedRead(XC *xc, const RequestPtr &req)
{
locked_addrs.push(req->getPaddr() & ~0xF);
std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
locked_addr_stack.push(req->getPaddr() & ~0xF);
DPRINTF(LLSC, "[cid:%d]: Reserved address %x.\n",
req->contextId(), req->getPaddr() & ~0xF);
}
@@ -96,21 +101,23 @@ handleLockedSnoopHit(XC *xc)
template <class XC> inline bool
handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask)
{
std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()];
// Normally RISC-V uses zero to indicate success and nonzero to indicate
// failure (right now only 1 is reserved), but in gem5 zero indicates
// failure and one indicates success, so here we conform to that (it should
// be switched in the instruction's implementation)
DPRINTF(LLSC, "[cid:%d]: locked_addrs empty? %s.\n", req->contextId(),
locked_addrs.empty() ? "yes" : "no");
if (!locked_addrs.empty()) {
locked_addr_stack.empty() ? "yes" : "no");
if (!locked_addr_stack.empty()) {
DPRINTF(LLSC, "[cid:%d]: addr = %x.\n", req->contextId(),
req->getPaddr() & ~0xF);
DPRINTF(LLSC, "[cid:%d]: last locked addr = %x.\n", req->contextId(),
locked_addrs.top());
locked_addr_stack.top());
}
if (locked_addrs.empty()
|| locked_addrs.top() != ((req->getPaddr() & ~0xF))) {
if (locked_addr_stack.empty()
|| locked_addr_stack.top() != ((req->getPaddr() & ~0xF))) {
req->setExtraData(0);
int stCondFailures = xc->readStCondFailures();
xc->setStCondFailures(++stCondFailures);