misc: Merge branch 'release-staging-v21-1' into develop
Change-Id: I6ba57d7f70be70ae43fab396780d18623679a59a
This commit is contained in:
@@ -32397,6 +32397,15 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
|
||||
/**
|
||||
* This is needed because we treat this instruction as a load
|
||||
* but it's not an actual memory request.
|
||||
* Without this, the destination register never gets marked as
|
||||
* free, leading to a possible deadlock
|
||||
*/
|
||||
wf->computeUnit->vrf[wf->simdId]->
|
||||
scheduleWriteOperandsFromLoad(wf, gpuDynInst);
|
||||
} // execute
|
||||
// --- Inst_DS__DS_PERMUTE_B32 class methods ---
|
||||
|
||||
@@ -32468,6 +32477,15 @@ namespace Gcn3ISA
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdLmReqsInPipe--;
|
||||
wf->validateRequestCounters();
|
||||
|
||||
/**
|
||||
* This is needed because we treat this instruction as a load
|
||||
* but it's not an actual memory request.
|
||||
* Without this, the destination register never gets marked as
|
||||
* free, leading to a possible deadlock
|
||||
*/
|
||||
wf->computeUnit->vrf[wf->simdId]->
|
||||
scheduleWriteOperandsFromLoad(wf, gpuDynInst);
|
||||
} // execute
|
||||
// --- Inst_DS__DS_BPERMUTE_B32 class methods ---
|
||||
|
||||
@@ -32539,6 +32557,15 @@ namespace Gcn3ISA
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdLmReqsInPipe--;
|
||||
wf->validateRequestCounters();
|
||||
|
||||
/**
|
||||
* This is needed because we treat this instruction as a load
|
||||
* but it's not an actual memory request.
|
||||
* Without this, the destination register never gets marked as
|
||||
* free, leading to a possible deadlock
|
||||
*/
|
||||
wf->computeUnit->vrf[wf->simdId]->
|
||||
scheduleWriteOperandsFromLoad(wf, gpuDynInst);
|
||||
} // execute
|
||||
|
||||
// --- Inst_DS__DS_ADD_U64 class methods ---
|
||||
@@ -34308,9 +34335,52 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(
|
||||
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
|
||||
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
|
||||
ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
|
||||
ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
|
||||
ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
|
||||
|
||||
addr.read();
|
||||
data0.read();
|
||||
data1.read();
|
||||
data2.read();
|
||||
|
||||
calcAddr(gpuDynInst, addr);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4] = data0[lane];
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
|
||||
}
|
||||
}
|
||||
|
||||
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Addr offset0 = instData.OFFSET0;
|
||||
Addr offset1 = instData.OFFSET1;
|
||||
Addr offset = (offset1 << 8) | offset0;
|
||||
|
||||
initMemWrite<3>(gpuDynInst, offset);
|
||||
} // initiateAcc
|
||||
|
||||
void
|
||||
Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
} // completeAcc
|
||||
|
||||
Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt)
|
||||
: Inst_DS(iFmt, "ds_write_b128")
|
||||
{
|
||||
@@ -34327,9 +34397,56 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(
|
||||
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
|
||||
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
|
||||
ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
|
||||
ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
|
||||
ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
|
||||
ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3);
|
||||
|
||||
addr.read();
|
||||
data0.read();
|
||||
data1.read();
|
||||
data2.read();
|
||||
data3.read();
|
||||
|
||||
calcAddr(gpuDynInst, addr);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4] = data0[lane];
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
|
||||
}
|
||||
}
|
||||
|
||||
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Addr offset0 = instData.OFFSET0;
|
||||
Addr offset1 = instData.OFFSET1;
|
||||
Addr offset = (offset1 << 8) | offset0;
|
||||
|
||||
initMemWrite<4>(gpuDynInst, offset);
|
||||
} // initiateAcc
|
||||
|
||||
void
|
||||
Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
} // completeAcc
|
||||
|
||||
Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt)
|
||||
: Inst_DS(iFmt, "ds_read_b96")
|
||||
{
|
||||
@@ -34345,7 +34462,51 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(
|
||||
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
|
||||
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
|
||||
|
||||
addr.read();
|
||||
|
||||
calcAddr(gpuDynInst, addr);
|
||||
|
||||
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Addr offset0 = instData.OFFSET0;
|
||||
Addr offset1 = instData.OFFSET1;
|
||||
Addr offset = (offset1 << 8) | offset0;
|
||||
|
||||
initMemRead<3>(gpuDynInst, offset);
|
||||
}
|
||||
|
||||
void
|
||||
Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
|
||||
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
|
||||
VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4];
|
||||
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 1];
|
||||
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 2];
|
||||
}
|
||||
}
|
||||
|
||||
vdst0.write();
|
||||
vdst1.write();
|
||||
vdst2.write();
|
||||
}
|
||||
|
||||
Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt)
|
||||
@@ -34363,9 +34524,57 @@ namespace Gcn3ISA
|
||||
void
|
||||
Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
panicUnimplemented();
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(
|
||||
gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
|
||||
ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
|
||||
|
||||
addr.read();
|
||||
|
||||
calcAddr(gpuDynInst, addr);
|
||||
|
||||
gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Addr offset0 = instData.OFFSET0;
|
||||
Addr offset1 = instData.OFFSET1;
|
||||
Addr offset = (offset1 << 8) | offset0;
|
||||
|
||||
initMemRead<4>(gpuDynInst, offset);
|
||||
} // initiateAcc
|
||||
|
||||
void
|
||||
Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
VecOperandU32 vdst0(gpuDynInst, extData.VDST);
|
||||
VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
|
||||
VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
|
||||
VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
vdst0[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4];
|
||||
vdst1[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 1];
|
||||
vdst2[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 2];
|
||||
vdst3[lane] = (reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * 4 + 3];
|
||||
}
|
||||
}
|
||||
|
||||
vdst0.write();
|
||||
vdst1.write();
|
||||
vdst2.write();
|
||||
vdst3.write();
|
||||
} // completeAcc
|
||||
|
||||
Inst_MUBUF__BUFFER_LOAD_FORMAT_X
|
||||
::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
|
||||
: Inst_MUBUF(iFmt, "buffer_load_format_x")
|
||||
|
||||
@@ -35226,6 +35226,8 @@ namespace Gcn3ISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
void initiateAcc(GPUDynInstPtr) override;
|
||||
void completeAcc(GPUDynInstPtr) override;
|
||||
}; // Inst_DS__DS_WRITE_B96
|
||||
|
||||
class Inst_DS__DS_WRITE_B128 : public Inst_DS
|
||||
@@ -35258,6 +35260,8 @@ namespace Gcn3ISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
void initiateAcc(GPUDynInstPtr) override;
|
||||
void completeAcc(GPUDynInstPtr) override;
|
||||
}; // Inst_DS__DS_WRITE_B128
|
||||
|
||||
class Inst_DS__DS_READ_B96 : public Inst_DS
|
||||
@@ -35290,6 +35294,8 @@ namespace Gcn3ISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
void initiateAcc(GPUDynInstPtr) override;
|
||||
void completeAcc(GPUDynInstPtr) override;
|
||||
}; // Inst_DS__DS_READ_B96
|
||||
|
||||
class Inst_DS__DS_READ_B128 : public Inst_DS
|
||||
@@ -35322,6 +35328,8 @@ namespace Gcn3ISA
|
||||
} // getOperandSize
|
||||
|
||||
void execute(GPUDynInstPtr) override;
|
||||
void initiateAcc(GPUDynInstPtr) override;
|
||||
void completeAcc(GPUDynInstPtr) override;
|
||||
}; // Inst_DS__DS_READ_B128
|
||||
|
||||
class Inst_MUBUF__BUFFER_LOAD_FORMAT_X : public Inst_MUBUF
|
||||
|
||||
@@ -416,6 +416,25 @@ namespace Gcn3ISA
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane] + offset;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]
|
||||
= wf->ldsChunk->read<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
|
||||
@@ -450,6 +469,25 @@ namespace Gcn3ISA
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane] + offset;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
wf->ldsChunk->write<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32),
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
|
||||
|
||||
@@ -65,6 +65,8 @@ class ArmMMU(BaseMMU):
|
||||
itb = ArmITB()
|
||||
dtb = ArmDTB()
|
||||
|
||||
sys = Param.System(Parent.any, "system object parameter")
|
||||
|
||||
stage2_itb = Param.ArmTLB(ArmStage2TLB(), "Stage 2 Instruction TLB")
|
||||
stage2_dtb = Param.ArmTLB(ArmStage2TLB(), "Stage 2 Data TLB")
|
||||
|
||||
@@ -80,12 +82,8 @@ class ArmMMU(BaseMMU):
|
||||
|
||||
@classmethod
|
||||
def walkerPorts(cls):
|
||||
return ["mmu.itb_walker.port", "mmu.dtb_walker.port",
|
||||
"mmu.stage2_itb_walker.port", "mmu.stage2_dtb_walker.port"]
|
||||
return ["mmu.itb_walker.port", "mmu.dtb_walker.port"]
|
||||
|
||||
def connectWalkerPorts(self, iport, dport):
|
||||
self.itb_walker.port = iport
|
||||
self.dtb_walker.port = dport
|
||||
|
||||
self.stage2_itb_walker.port = iport
|
||||
self.stage2_dtb_walker.port = dport
|
||||
|
||||
@@ -129,7 +129,7 @@ ArmISA::HTMCheckpoint::restore(ThreadContext *tc, HtmFailureFaultCause cause)
|
||||
case HtmFailureFaultCause::EXPLICIT:
|
||||
replaceBits(error_code, 14, 0, tcreason);
|
||||
replaceBits(error_code, 16, 1);
|
||||
retry = bits(15, tcreason);
|
||||
retry = bits(tcreason, 15);
|
||||
break;
|
||||
case HtmFailureFaultCause::MEMORY:
|
||||
replaceBits(error_code, 17, 1);
|
||||
|
||||
@@ -47,10 +47,17 @@ using namespace ArmISA;
|
||||
MMU::MMU(const ArmMMUParams &p)
|
||||
: BaseMMU(p),
|
||||
itbStage2(p.stage2_itb), dtbStage2(p.stage2_dtb),
|
||||
iport(p.itb_walker, p.sys->getRequestorId(p.itb_walker)),
|
||||
dport(p.dtb_walker, p.sys->getRequestorId(p.dtb_walker)),
|
||||
itbWalker(p.itb_walker), dtbWalker(p.dtb_walker),
|
||||
itbStage2Walker(p.stage2_itb_walker),
|
||||
dtbStage2Walker(p.stage2_dtb_walker)
|
||||
{}
|
||||
{
|
||||
itbWalker->setPort(&iport);
|
||||
dtbWalker->setPort(&dport);
|
||||
itbStage2Walker->setPort(&iport);
|
||||
dtbStage2Walker->setPort(&dport);
|
||||
}
|
||||
|
||||
void
|
||||
MMU::init()
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#ifndef __ARCH_ARM_MMU_HH__
|
||||
#define __ARCH_ARM_MMU_HH__
|
||||
|
||||
#include "arch/arm/table_walker.hh"
|
||||
#include "arch/arm/tlb.hh"
|
||||
#include "arch/generic/mmu.hh"
|
||||
|
||||
@@ -69,6 +70,9 @@ class MMU : public BaseMMU
|
||||
TLB *itbStage2;
|
||||
TLB *dtbStage2;
|
||||
|
||||
TableWalker::Port iport;
|
||||
TableWalker::Port dport;
|
||||
|
||||
TableWalker *itbWalker;
|
||||
TableWalker *dtbWalker;
|
||||
TableWalker *itbStage2Walker;
|
||||
|
||||
@@ -61,7 +61,7 @@ using namespace ArmISA;
|
||||
TableWalker::TableWalker(const Params &p)
|
||||
: ClockedObject(p),
|
||||
requestorId(p.sys->getRequestorId(this)),
|
||||
port(new Port(this, requestorId)),
|
||||
port(nullptr),
|
||||
isStage2(p.is_stage2), tlb(NULL),
|
||||
currState(NULL), pending(false),
|
||||
numSquashable(p.num_squash_per_cycle),
|
||||
|
||||
@@ -1037,6 +1037,7 @@ class TableWalker : public ClockedObject
|
||||
|
||||
void setMmu(MMU *_mmu) { mmu = _mmu; }
|
||||
void setTlb(TLB *_tlb) { tlb = _tlb; }
|
||||
void setPort(Port *_port) { port = _port; }
|
||||
TLB* getTlb() { return tlb; }
|
||||
void memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
|
||||
uint8_t texcb, bool s);
|
||||
|
||||
@@ -274,7 +274,7 @@ def template JumpExecute {{
|
||||
}
|
||||
}};
|
||||
|
||||
def template CSRExecuteRo {{
|
||||
def template CSRExecute {{
|
||||
Fault
|
||||
%(class_name)s::execute(ExecContext *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
@@ -287,6 +287,8 @@ def template CSRExecuteRo {{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
RegVal data, olddata;
|
||||
|
||||
switch (csr) {
|
||||
case CSR_SATP: {
|
||||
auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
|
||||
@@ -311,91 +313,55 @@ def template CSRExecuteRo {{
|
||||
break;
|
||||
}
|
||||
|
||||
RegVal data;
|
||||
if (csr == CSR_FCSR) {
|
||||
data = xc->readMiscReg(MISCREG_FFLAGS) |
|
||||
(xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
|
||||
} else {
|
||||
data = xc->readMiscReg(midx);
|
||||
}
|
||||
|
||||
DPRINTF(RiscvMisc, "Reading CSR %s: %#x\n", csrName, data);
|
||||
|
||||
%(code)s;
|
||||
%(op_wb)s;
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template CSRExecuteRw {{
|
||||
Fault
|
||||
%(class_name)s::execute(ExecContext *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
if (!valid) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
csprintf("Illegal CSR index %#x\n", csr), machInst);
|
||||
}
|
||||
if (bits(csr, 11, 10) == 0x3) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
csprintf("CSR %s is read-only\n", csrName), machInst);
|
||||
}
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
switch (csr) {
|
||||
case CSR_SATP: {
|
||||
auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
|
||||
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||
if (pm == PRV_U || (pm == PRV_S && status.tvm == 1)) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
"SATP access in user mode or with TVM enabled\n",
|
||||
machInst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CSR_MSTATUS: {
|
||||
auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
|
||||
if (pm != PrivilegeMode::PRV_M) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
"MSTATUS is only accessibly in machine mode\n",
|
||||
machInst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
RegVal data;
|
||||
if (csr == CSR_FCSR) {
|
||||
data = xc->readMiscReg(MISCREG_FFLAGS) |
|
||||
olddata = xc->readMiscReg(MISCREG_FFLAGS) |
|
||||
(xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
|
||||
} else {
|
||||
data = xc->readMiscReg(midx);
|
||||
olddata = xc->readMiscReg(midx);
|
||||
}
|
||||
auto olddata_all = olddata;
|
||||
|
||||
RegVal original = data;
|
||||
|
||||
DPRINTF(RiscvMisc, "Reading CSR %s: %#x\n", csrName, data & maskVal);
|
||||
olddata &= maskVal;
|
||||
DPRINTF(RiscvMisc, "Reading CSR %s: %#x\n", csrName, olddata);
|
||||
data = olddata;
|
||||
|
||||
%(code)s;
|
||||
|
||||
// We must keep those original bits not in the mask. Hidden bits should
|
||||
// keep their original value.
|
||||
data = (original & ~maskVal) | (data & maskVal);
|
||||
|
||||
DPRINTF(RiscvMisc, "Writing %#x to CSR %s.\n", data, csrName);
|
||||
|
||||
if (csr == CSR_FCSR) {
|
||||
xc->setMiscReg(MISCREG_FFLAGS, bits(data, 4, 0));
|
||||
xc->setMiscReg(MISCREG_FRM, bits(data, 7, 5));
|
||||
} else {
|
||||
xc->setMiscReg(midx, data);
|
||||
data &= maskVal;
|
||||
if (data != olddata) {
|
||||
if (bits(csr, 11, 10) == 0x3) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
csprintf("CSR %s is read-only\n", csrName), machInst);
|
||||
}
|
||||
auto newdata_all = data;
|
||||
// We must keep those original bits not in mask.
|
||||
// olddata and data only contain the bits visable
|
||||
// in current privilige level.
|
||||
newdata_all = (olddata_all & ~maskVal) | data;
|
||||
DPRINTF(RiscvMisc, "Writing %#x to CSR %s.\n",
|
||||
newdata_all, csrName);
|
||||
switch (csr) {
|
||||
case CSR_FCSR:
|
||||
xc->setMiscReg(MISCREG_FFLAGS, bits(data, 4, 0));
|
||||
xc->setMiscReg(MISCREG_FRM, bits(data, 7, 5));
|
||||
break;
|
||||
case CSR_MIP: case CSR_MIE:
|
||||
case CSR_SIP: case CSR_SIE:
|
||||
case CSR_UIP: case CSR_UIE:
|
||||
case CSR_MSTATUS: case CSR_SSTATUS: case CSR_USTATUS:
|
||||
if (newdata_all != olddata_all) {
|
||||
xc->setMiscReg(midx, newdata_all);
|
||||
} else {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
"Only bits in mask are allowed to be set\n",
|
||||
machInst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
xc->setMiscReg(midx, data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
%(op_wb)s;
|
||||
return NoFault;
|
||||
}
|
||||
@@ -499,24 +465,10 @@ def format SystemOp(code, *opt_flags) {{
|
||||
exec_output = BasicExecute.subst(iop)
|
||||
}};
|
||||
|
||||
def template CSRDecode {{
|
||||
if (RS1)
|
||||
return new %(class_name)sRw(machInst);
|
||||
else
|
||||
return new %(class_name)sRo(machInst);
|
||||
}};
|
||||
|
||||
def format CSROp(code, *opt_flags) {{
|
||||
iop = InstObjParams(name, Name + "Ro", 'CSROp', code, opt_flags)
|
||||
iop = InstObjParams(name, Name, 'CSROp', code, opt_flags)
|
||||
header_output = BasicDeclare.subst(iop)
|
||||
decoder_output = BasicConstructor.subst(iop)
|
||||
exec_output = CSRExecuteRo.subst(iop)
|
||||
|
||||
iop = InstObjParams(name, Name + "Rw", 'CSROp', code, opt_flags)
|
||||
header_output += BasicDeclare.subst(iop)
|
||||
decoder_output += BasicConstructor.subst(iop)
|
||||
exec_output += CSRExecuteRw.subst(iop)
|
||||
|
||||
iop = InstObjParams(name, Name, 'CSROp', "", opt_flags)
|
||||
decode_block = CSRDecode.subst(iop)
|
||||
decode_block = BasicDecode.subst(iop)
|
||||
exec_output = CSRExecute.subst(iop)
|
||||
}};
|
||||
|
||||
@@ -51,7 +51,6 @@
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/exec_context.hh"
|
||||
#include "cpu/inst_res.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/pc_event.hh"
|
||||
#include "cpu/simple_thread.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "cpu/activity.hh"
|
||||
#include "cpu/checker/cpu.hh"
|
||||
#include "cpu/checker/thread_context.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/thread_context.hh"
|
||||
#include "cpu/simple_thread.hh"
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "base/str.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/checker/cpu.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "debug/Activity.hh"
|
||||
|
||||
@@ -990,7 +990,7 @@ void
|
||||
GPUComputeDriver::allocateGpuVma(Request::CacheCoherenceFlags mtype,
|
||||
Addr start, Addr length)
|
||||
{
|
||||
AddrRange range = AddrRange(start, start + length - 1);
|
||||
AddrRange range = AddrRange(start, start + length);
|
||||
DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n",
|
||||
range.start(), range.end(), mtype);
|
||||
fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(),
|
||||
|
||||
@@ -100,7 +100,7 @@ class TLBCoalescer : public ClockedObject
|
||||
* option is to change it to curTick(), so we coalesce based
|
||||
* on the receive time.
|
||||
*/
|
||||
typedef std::unordered_map<int64_t, std::vector<coalescedReq>>
|
||||
typedef std::map<int64_t, std::vector<coalescedReq>>
|
||||
CoalescingFIFO;
|
||||
|
||||
CoalescingFIFO coalescerFIFO;
|
||||
|
||||
@@ -645,7 +645,10 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
|
||||
// of the exec_mask.
|
||||
int num_packets = 1;
|
||||
if (!m_usingRubyTester) {
|
||||
num_packets = getDynInst(pkt)->exec_mask.count();
|
||||
num_packets = 0;
|
||||
for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
|
||||
num_packets += getDynInst(pkt)->getLaneStatus(i);
|
||||
}
|
||||
}
|
||||
|
||||
// the pkt is temporarily stored in the uncoalesced table until
|
||||
|
||||
Reference in New Issue
Block a user