Merge ktlim@zamp:./local/clean/o3-merge/m5
into zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem
configs/boot/micro_memlat.rcS:
configs/boot/micro_tlblat.rcS:
src/arch/alpha/ev5.cc:
src/arch/alpha/isa/decoder.isa:
src/arch/alpha/isa_traits.hh:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.hh:
src/cpu/checker/cpu.hh:
src/cpu/checker/cpu_impl.hh:
src/cpu/o3/alpha/cpu_impl.hh:
src/cpu/o3/alpha/params.hh:
src/cpu/o3/checker_builder.cc:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_impl.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/checker_builder.cc:
src/cpu/ozone/cpu.hh:
src/cpu/ozone/cpu_impl.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/front_end_impl.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_back_end_impl.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/lw_lsq_impl.hh:
src/cpu/ozone/thread_state.hh:
src/cpu/simple/base.cc:
src/cpu/simple_thread.cc:
src/cpu/simple_thread.hh:
src/cpu/thread_state.hh:
src/dev/ide_disk.cc:
src/python/m5/objects/O3CPU.py:
src/python/m5/objects/Root.py:
src/python/m5/objects/System.py:
src/sim/pseudo_inst.cc:
src/sim/pseudo_inst.hh:
src/sim/system.hh:
util/m5/m5.c:
Hand merge.
--HG--
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/o3/checker_builder.cc
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/ozone/checker_builder.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/simple/cpu.cc => src/cpu/simple/base.cc
rename : cpu/cpu_exec_context.cc => src/cpu/simple_thread.cc
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/O3CPU.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/system.hh => src/sim/system.hh
extra : convert_revision : 135d90e43f6cea89f9460ba4e23f4b0b85886e7d
This commit is contained in:
@@ -554,6 +554,7 @@ AlphaISA::MiscRegFile::setIpr(int idx, uint64_t val, ThreadContext *tc)
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
AlphaISA::copyIprs(ThreadContext *src, ThreadContext *dest)
|
||||
{
|
||||
@@ -562,6 +563,7 @@ AlphaISA::copyIprs(ThreadContext *src, ThreadContext *dest)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check for special simulator handling of specific PAL calls.
|
||||
* If return value is false, actual PAL call will be suppressed.
|
||||
|
||||
@@ -105,6 +105,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
|
||||
|
||||
Param<string> boot_osflags;
|
||||
Param<string> readfile;
|
||||
Param<string> symbolfile;
|
||||
Param<unsigned int> init_param;
|
||||
|
||||
Param<uint64_t> system_type;
|
||||
@@ -124,6 +125,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
|
||||
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
|
||||
"a"),
|
||||
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
|
||||
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
|
||||
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
|
||||
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
|
||||
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
|
||||
@@ -143,6 +145,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem)
|
||||
p->boot_osflags = boot_osflags;
|
||||
p->init_param = init_param;
|
||||
p->readfile = readfile;
|
||||
p->symbolfile = symbolfile;
|
||||
p->system_type = system_type;
|
||||
p->system_rev = system_rev;
|
||||
return new FreebsdAlphaSystem(p);
|
||||
|
||||
@@ -779,10 +779,10 @@ decode OPCODE default Unknown::unknown() {
|
||||
}}, IsNonSpeculative, IsQuiesce);
|
||||
0x03: quiesceCycles({{
|
||||
AlphaPseudo::quiesceCycles(xc->tcBase(), R16);
|
||||
}}, IsNonSpeculative, IsQuiesce);
|
||||
}}, IsNonSpeculative, IsQuiesce, IsUnverifiable);
|
||||
0x04: quiesceTime({{
|
||||
R0 = AlphaPseudo::quiesceTime(xc->tcBase());
|
||||
}}, IsNonSpeculative);
|
||||
}}, IsNonSpeculative, IsUnverifiable);
|
||||
0x10: ivlb({{
|
||||
AlphaPseudo::ivlb(xc->tcBase());
|
||||
}}, No_OpClass, IsNonSpeculative);
|
||||
@@ -795,6 +795,9 @@ decode OPCODE default Unknown::unknown() {
|
||||
0x21: m5exit({{
|
||||
AlphaPseudo::m5exit(xc->tcBase(), R16);
|
||||
}}, No_OpClass, IsNonSpeculative);
|
||||
0x31: loadsymbol({{
|
||||
AlphaPseudo::loadsymbol(xc->tcBase());
|
||||
}}, No_OpClass, IsNonSpeculative);
|
||||
0x30: initparam({{ Ra = xc->tcBase()->getCpuPtr()->system->init_param; }});
|
||||
0x40: resetstats({{
|
||||
AlphaPseudo::resetstats(xc->tcBase(), R16, R17);
|
||||
|
||||
@@ -528,7 +528,7 @@ def template MiscInitiateAcc {{
|
||||
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
panic("Misc instruction does not support split access method!");
|
||||
warn("Misc instruction does not support split access method!");
|
||||
return NoFault;
|
||||
}
|
||||
}};
|
||||
@@ -539,7 +539,7 @@ def template MiscCompleteAcc {{
|
||||
%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
panic("Misc instruction does not support split access method!");
|
||||
warn("Misc instruction does not support split access method!");
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
@@ -42,190 +42,10 @@ class StaticInstPtr;
|
||||
|
||||
namespace AlphaISA
|
||||
{
|
||||
using namespace LittleEndianGuest;
|
||||
|
||||
// These enumerate all the registers for dependence tracking.
|
||||
enum DependenceTags {
|
||||
// 0..31 are the integer regs 0..31
|
||||
// 32..63 are the FP regs 0..31, i.e. use (reg + FP_Base_DepTag)
|
||||
FP_Base_DepTag = 40,
|
||||
Ctrl_Base_DepTag = 72,
|
||||
Fpcr_DepTag = 72, // floating point control register
|
||||
Uniq_DepTag = 73,
|
||||
Lock_Flag_DepTag = 74,
|
||||
Lock_Addr_DepTag = 75,
|
||||
IPR_Base_DepTag = 76
|
||||
};
|
||||
|
||||
StaticInstPtr decodeInst(ExtMachInst);
|
||||
|
||||
// Alpha Does NOT have a delay slot
|
||||
#define ISA_HAS_DELAY_SLOT 0
|
||||
|
||||
const Addr PageShift = 13;
|
||||
const Addr PageBytes = ULL(1) << PageShift;
|
||||
const Addr PageMask = ~(PageBytes - 1);
|
||||
const Addr PageOffset = PageBytes - 1;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Translation stuff
|
||||
//
|
||||
|
||||
const Addr PteShift = 3;
|
||||
const Addr NPtePageShift = PageShift - PteShift;
|
||||
const Addr NPtePage = ULL(1) << NPtePageShift;
|
||||
const Addr PteMask = NPtePage - 1;
|
||||
|
||||
// User Virtual
|
||||
const Addr USegBase = ULL(0x0);
|
||||
const Addr USegEnd = ULL(0x000003ffffffffff);
|
||||
|
||||
// Kernel Direct Mapped
|
||||
const Addr K0SegBase = ULL(0xfffffc0000000000);
|
||||
const Addr K0SegEnd = ULL(0xfffffdffffffffff);
|
||||
|
||||
// Kernel Virtual
|
||||
const Addr K1SegBase = ULL(0xfffffe0000000000);
|
||||
const Addr K1SegEnd = ULL(0xffffffffffffffff);
|
||||
|
||||
// For loading... XXX This maybe could be USegEnd?? --ali
|
||||
const Addr LoadAddrMask = ULL(0xffffffffff);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Interrupt levels
|
||||
//
|
||||
enum InterruptLevels
|
||||
{
|
||||
INTLEVEL_SOFTWARE_MIN = 4,
|
||||
INTLEVEL_SOFTWARE_MAX = 19,
|
||||
|
||||
INTLEVEL_EXTERNAL_MIN = 20,
|
||||
INTLEVEL_EXTERNAL_MAX = 34,
|
||||
|
||||
INTLEVEL_IRQ0 = 20,
|
||||
INTLEVEL_IRQ1 = 21,
|
||||
INTINDEX_ETHERNET = 0,
|
||||
INTINDEX_SCSI = 1,
|
||||
INTLEVEL_IRQ2 = 22,
|
||||
INTLEVEL_IRQ3 = 23,
|
||||
|
||||
INTLEVEL_SERIAL = 33,
|
||||
|
||||
NumInterruptLevels = INTLEVEL_EXTERNAL_MAX
|
||||
};
|
||||
|
||||
|
||||
// EV5 modes
|
||||
enum mode_type
|
||||
{
|
||||
mode_kernel = 0, // kernel
|
||||
mode_executive = 1, // executive (unused by unix)
|
||||
mode_supervisor = 2, // supervisor (unused by unix)
|
||||
mode_user = 3, // user mode
|
||||
mode_number // number of modes
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Internal Processor Reigsters
|
||||
//
|
||||
enum md_ipr_names
|
||||
{
|
||||
IPR_ISR = 0x100, // interrupt summary register
|
||||
IPR_ITB_TAG = 0x101, // ITLB tag register
|
||||
IPR_ITB_PTE = 0x102, // ITLB page table entry register
|
||||
IPR_ITB_ASN = 0x103, // ITLB address space register
|
||||
IPR_ITB_PTE_TEMP = 0x104, // ITLB page table entry temp register
|
||||
IPR_ITB_IA = 0x105, // ITLB invalidate all register
|
||||
IPR_ITB_IAP = 0x106, // ITLB invalidate all process register
|
||||
IPR_ITB_IS = 0x107, // ITLB invalidate select register
|
||||
IPR_SIRR = 0x108, // software interrupt request register
|
||||
IPR_ASTRR = 0x109, // asynchronous system trap request register
|
||||
IPR_ASTER = 0x10a, // asynchronous system trap enable register
|
||||
IPR_EXC_ADDR = 0x10b, // exception address register
|
||||
IPR_EXC_SUM = 0x10c, // exception summary register
|
||||
IPR_EXC_MASK = 0x10d, // exception mask register
|
||||
IPR_PAL_BASE = 0x10e, // PAL base address register
|
||||
IPR_ICM = 0x10f, // instruction current mode
|
||||
IPR_IPLR = 0x110, // interrupt priority level register
|
||||
IPR_INTID = 0x111, // interrupt ID register
|
||||
IPR_IFAULT_VA_FORM = 0x112, // formatted faulting virtual addr register
|
||||
IPR_IVPTBR = 0x113, // virtual page table base register
|
||||
IPR_HWINT_CLR = 0x115, // H/W interrupt clear register
|
||||
IPR_SL_XMIT = 0x116, // serial line transmit register
|
||||
IPR_SL_RCV = 0x117, // serial line receive register
|
||||
IPR_ICSR = 0x118, // instruction control and status register
|
||||
IPR_IC_FLUSH = 0x119, // instruction cache flush control
|
||||
IPR_IC_PERR_STAT = 0x11a, // inst cache parity error status register
|
||||
IPR_PMCTR = 0x11c, // performance counter register
|
||||
|
||||
// PAL temporary registers...
|
||||
// register meanings gleaned from osfpal.s source code
|
||||
IPR_PALtemp0 = 0x140, // local scratch
|
||||
IPR_PALtemp1 = 0x141, // local scratch
|
||||
IPR_PALtemp2 = 0x142, // entUna
|
||||
IPR_PALtemp3 = 0x143, // CPU specific impure area pointer
|
||||
IPR_PALtemp4 = 0x144, // memory management temp
|
||||
IPR_PALtemp5 = 0x145, // memory management temp
|
||||
IPR_PALtemp6 = 0x146, // memory management temp
|
||||
IPR_PALtemp7 = 0x147, // entIF
|
||||
IPR_PALtemp8 = 0x148, // intmask
|
||||
IPR_PALtemp9 = 0x149, // entSys
|
||||
IPR_PALtemp10 = 0x14a, // ??
|
||||
IPR_PALtemp11 = 0x14b, // entInt
|
||||
IPR_PALtemp12 = 0x14c, // entArith
|
||||
IPR_PALtemp13 = 0x14d, // reserved for platform specific PAL
|
||||
IPR_PALtemp14 = 0x14e, // reserved for platform specific PAL
|
||||
IPR_PALtemp15 = 0x14f, // reserved for platform specific PAL
|
||||
IPR_PALtemp16 = 0x150, // scratch / whami<7:0> / mces<4:0>
|
||||
IPR_PALtemp17 = 0x151, // sysval
|
||||
IPR_PALtemp18 = 0x152, // usp
|
||||
IPR_PALtemp19 = 0x153, // ksp
|
||||
IPR_PALtemp20 = 0x154, // PTBR
|
||||
IPR_PALtemp21 = 0x155, // entMM
|
||||
IPR_PALtemp22 = 0x156, // kgp
|
||||
IPR_PALtemp23 = 0x157, // PCBB
|
||||
|
||||
IPR_DTB_ASN = 0x200, // DTLB address space number register
|
||||
IPR_DTB_CM = 0x201, // DTLB current mode register
|
||||
IPR_DTB_TAG = 0x202, // DTLB tag register
|
||||
IPR_DTB_PTE = 0x203, // DTLB page table entry register
|
||||
IPR_DTB_PTE_TEMP = 0x204, // DTLB page table entry temporary register
|
||||
|
||||
IPR_MM_STAT = 0x205, // data MMU fault status register
|
||||
IPR_VA = 0x206, // fault virtual address register
|
||||
IPR_VA_FORM = 0x207, // formatted virtual address register
|
||||
IPR_MVPTBR = 0x208, // MTU virtual page table base register
|
||||
IPR_DTB_IAP = 0x209, // DTLB invalidate all process register
|
||||
IPR_DTB_IA = 0x20a, // DTLB invalidate all register
|
||||
IPR_DTB_IS = 0x20b, // DTLB invalidate single register
|
||||
IPR_ALT_MODE = 0x20c, // alternate mode register
|
||||
IPR_CC = 0x20d, // cycle counter register
|
||||
IPR_CC_CTL = 0x20e, // cycle counter control register
|
||||
IPR_MCSR = 0x20f, // MTU control register
|
||||
|
||||
IPR_DC_FLUSH = 0x210,
|
||||
IPR_DC_PERR_STAT = 0x212, // Dcache parity error status register
|
||||
IPR_DC_TEST_CTL = 0x213, // Dcache test tag control register
|
||||
IPR_DC_TEST_TAG = 0x214, // Dcache test tag register
|
||||
IPR_DC_TEST_TAG_TEMP = 0x215, // Dcache test tag temporary register
|
||||
IPR_DC_MODE = 0x216, // Dcache mode register
|
||||
IPR_MAF_MODE = 0x217, // miss address file mode register
|
||||
|
||||
NumInternalProcRegs // number of IPR registers
|
||||
};
|
||||
#else
|
||||
const int NumInternalProcRegs = 0;
|
||||
#endif
|
||||
|
||||
// Constants Related to the number of registers
|
||||
typedef uint32_t MachInst;
|
||||
typedef uint64_t ExtMachInst;
|
||||
typedef uint8_t RegIndex;
|
||||
|
||||
const int NumIntArchRegs = 32;
|
||||
const int NumPALShadowRegs = 8;
|
||||
@@ -233,15 +53,6 @@ namespace AlphaISA
|
||||
// @todo: Figure out what this number really should be.
|
||||
const int NumMiscArchRegs = 32;
|
||||
|
||||
const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
|
||||
const int NumFloatRegs = NumFloatArchRegs;
|
||||
const int NumMiscRegs = NumMiscArchRegs;
|
||||
|
||||
const int TotalNumRegs = NumIntRegs + NumFloatRegs +
|
||||
NumMiscRegs + NumInternalProcRegs;
|
||||
|
||||
const int TotalDataRegs = NumIntRegs + NumFloatRegs;
|
||||
|
||||
// Static instruction parameters
|
||||
const int MaxInstSrcRegs = 3;
|
||||
const int MaxInstDestRegs = 2;
|
||||
@@ -265,23 +76,270 @@ namespace AlphaISA
|
||||
const int SyscallPseudoReturnReg = ArgumentReg4;
|
||||
const int SyscallSuccessReg = 19;
|
||||
|
||||
|
||||
|
||||
const int LogVMPageSize = 13; // 8K bytes
|
||||
const int VMPageSize = (1 << LogVMPageSize);
|
||||
|
||||
const int BranchPredAddrShiftAmt = 2; // instructions are 4-byte aligned
|
||||
|
||||
const int MachineBytes = 8;
|
||||
const int WordBytes = 4;
|
||||
const int HalfwordBytes = 2;
|
||||
const int ByteBytes = 1;
|
||||
|
||||
|
||||
const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
|
||||
const int NumFloatRegs = NumFloatArchRegs;
|
||||
const int NumMiscRegs = NumMiscArchRegs;
|
||||
|
||||
// These enumerate all the registers for dependence tracking.
|
||||
enum DependenceTags {
|
||||
// 0..31 are the integer regs 0..31
|
||||
// 32..63 are the FP regs 0..31, i.e. use (reg + FP_Base_DepTag)
|
||||
FP_Base_DepTag = 40,
|
||||
Ctrl_Base_DepTag = 72,
|
||||
Fpcr_DepTag = 72, // floating point control register
|
||||
Uniq_DepTag = 73,
|
||||
Lock_Flag_DepTag = 74,
|
||||
Lock_Addr_DepTag = 75,
|
||||
IPR_Base_DepTag = 76
|
||||
};
|
||||
|
||||
typedef uint64_t IntReg;
|
||||
typedef IntReg IntRegFile[NumIntRegs];
|
||||
|
||||
// floating point register file entry type
|
||||
typedef union {
|
||||
uint64_t q;
|
||||
double d;
|
||||
} FloatReg;
|
||||
|
||||
typedef union {
|
||||
uint64_t q[NumFloatRegs]; // integer qword view
|
||||
double d[NumFloatRegs]; // double-precision floating point view
|
||||
|
||||
void clear()
|
||||
{ bzero(d, sizeof(d)); }
|
||||
} FloatRegFile;
|
||||
|
||||
extern const Addr PageShift;
|
||||
extern const Addr PageBytes;
|
||||
extern const Addr PageMask;
|
||||
extern const Addr PageOffset;
|
||||
|
||||
// redirected register map, really only used for the full system case.
|
||||
extern const int reg_redir[NumIntRegs];
|
||||
|
||||
#if FULL_SYSTEM
|
||||
|
||||
typedef uint64_t InternalProcReg;
|
||||
|
||||
#include "arch/alpha/isa_fullsys_traits.hh"
|
||||
|
||||
#else
|
||||
const int NumInternalProcRegs = 0;
|
||||
#endif
|
||||
|
||||
// control register file contents
|
||||
typedef uint64_t MiscReg;
|
||||
class MiscRegFile {
|
||||
protected:
|
||||
uint64_t fpcr; // floating point condition codes
|
||||
uint64_t uniq; // process-unique register
|
||||
bool lock_flag; // lock flag for LL/SC
|
||||
Addr lock_addr; // lock address for LL/SC
|
||||
|
||||
public:
|
||||
MiscReg readReg(int misc_reg);
|
||||
|
||||
//These functions should be removed once the simplescalar cpu model
|
||||
//has been replaced.
|
||||
int getInstAsid();
|
||||
int getDataAsid();
|
||||
|
||||
MiscReg readRegWithEffect(int misc_reg, Fault &fault, ExecContext *xc);
|
||||
|
||||
Fault setReg(int misc_reg, const MiscReg &val);
|
||||
|
||||
Fault setRegWithEffect(int misc_reg, const MiscReg &val,
|
||||
ExecContext *xc);
|
||||
|
||||
void serialize(std::ostream &os);
|
||||
|
||||
void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
|
||||
void clear()
|
||||
{
|
||||
fpcr = uniq = 0;
|
||||
lock_flag = 0;
|
||||
lock_addr = 0;
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
protected:
|
||||
InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs
|
||||
|
||||
private:
|
||||
MiscReg readIpr(int idx, Fault &fault, ExecContext *xc);
|
||||
|
||||
Fault setIpr(int idx, uint64_t val, ExecContext *xc);
|
||||
|
||||
void copyIprs(ExecContext *xc);
|
||||
#endif
|
||||
friend class RegFile;
|
||||
};
|
||||
|
||||
const int TotalNumRegs = NumIntRegs + NumFloatRegs +
|
||||
NumMiscRegs + NumInternalProcRegs;
|
||||
|
||||
const int TotalDataRegs = NumIntRegs + NumFloatRegs;
|
||||
|
||||
typedef union {
|
||||
IntReg intreg;
|
||||
FloatReg fpreg;
|
||||
MiscReg ctrlreg;
|
||||
} AnyReg;
|
||||
|
||||
struct RegFile {
|
||||
IntRegFile intRegFile; // (signed) integer register file
|
||||
FloatRegFile floatRegFile; // floating point register file
|
||||
MiscRegFile miscRegs; // control register file
|
||||
Addr pc; // program counter
|
||||
Addr npc; // next-cycle program counter
|
||||
Addr nnpc;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
int intrflag; // interrupt flag
|
||||
inline int instAsid()
|
||||
{ return EV5::ITB_ASN_ASN(miscRegs.ipr[IPR_ITB_ASN]); }
|
||||
inline int dataAsid()
|
||||
{ return EV5::DTB_ASN_ASN(miscRegs.ipr[IPR_DTB_ASN]); }
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
void serialize(std::ostream &os);
|
||||
void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
|
||||
void clear()
|
||||
{
|
||||
bzero(intRegFile, sizeof(intRegFile));
|
||||
floatRegFile.clear();
|
||||
miscRegs.clear();
|
||||
}
|
||||
};
|
||||
|
||||
static inline ExtMachInst makeExtMI(MachInst inst, const uint64_t &pc);
|
||||
|
||||
StaticInstPtr decodeInst(ExtMachInst);
|
||||
|
||||
// Alpha Does NOT have a delay slot
|
||||
#define ISA_HAS_DELAY_SLOT 0
|
||||
|
||||
// return a no-op instruction... used for instruction fetch faults
|
||||
// Alpha UNOP (ldq_u r31,0(r0))
|
||||
const ExtMachInst NoopMachInst = 0x2ffe0000;
|
||||
extern const ExtMachInst NoopMachInst;
|
||||
|
||||
// redirected register map, really only used for the full system case.
|
||||
extern const int reg_redir[NumIntRegs];
|
||||
enum annotes {
|
||||
ANNOTE_NONE = 0,
|
||||
// An impossible number for instruction annotations
|
||||
ITOUCH_ANNOTE = 0xffffffff,
|
||||
};
|
||||
|
||||
static inline bool isCallerSaveIntegerRegister(unsigned int reg) {
|
||||
panic("register classification not implemented");
|
||||
return (reg >= 1 && reg <= 8 || reg >= 22 && reg <= 25 || reg == 27);
|
||||
}
|
||||
|
||||
static inline bool isCalleeSaveIntegerRegister(unsigned int reg) {
|
||||
panic("register classification not implemented");
|
||||
return (reg >= 9 && reg <= 15);
|
||||
}
|
||||
|
||||
static inline bool isCallerSaveFloatRegister(unsigned int reg) {
|
||||
panic("register classification not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool isCalleeSaveFloatRegister(unsigned int reg) {
|
||||
panic("register classification not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline Addr alignAddress(const Addr &addr,
|
||||
unsigned int nbytes) {
|
||||
return (addr & ~(nbytes - 1));
|
||||
}
|
||||
|
||||
// Instruction address compression hooks
|
||||
static inline Addr realPCToFetchPC(const Addr &addr) {
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline Addr fetchPCToRealPC(const Addr &addr) {
|
||||
return addr;
|
||||
}
|
||||
|
||||
// the size of "fetched" instructions (not necessarily the size
|
||||
// of real instructions for PISA)
|
||||
static inline size_t fetchInstSize() {
|
||||
return sizeof(MachInst);
|
||||
}
|
||||
|
||||
static inline MachInst makeRegisterCopy(int dest, int src) {
|
||||
panic("makeRegisterCopy not implemented");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Machine operations
|
||||
|
||||
void saveMachineReg(AnyReg &savereg, const RegFile ®_file,
|
||||
int regnum);
|
||||
|
||||
void restoreMachineReg(RegFile ®s, const AnyReg ®,
|
||||
int regnum);
|
||||
|
||||
#if 0
|
||||
static void serializeSpecialRegs(const Serializable::Proxy &proxy,
|
||||
const RegFile ®s);
|
||||
|
||||
static void unserializeSpecialRegs(const IniFile *db,
|
||||
const std::string &category,
|
||||
ConfigNode *node,
|
||||
RegFile ®s);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Function to insure ISA semantics about 0 registers.
|
||||
* @param xc The execution context.
|
||||
*/
|
||||
template <class XC>
|
||||
void zeroRegisters(XC *xc);
|
||||
|
||||
const Addr MaxAddr = (Addr)-1;
|
||||
|
||||
#if !FULL_SYSTEM
|
||||
static inline void setSyscallReturn(SyscallReturn return_value, RegFile *regs)
|
||||
{
|
||||
// check for error condition. Alpha syscall convention is to
|
||||
// indicate success/failure in reg a3 (r19) and put the
|
||||
// return value itself in the standard return value reg (v0).
|
||||
if (return_value.successful()) {
|
||||
// no error
|
||||
regs->intRegFile[SyscallSuccessReg] = 0;
|
||||
regs->intRegFile[ReturnValueReg] = return_value.value();
|
||||
} else {
|
||||
// got an error, return details
|
||||
regs->intRegFile[SyscallSuccessReg] = (IntReg) -1;
|
||||
regs->intRegFile[ReturnValueReg] = -return_value.value();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void copyRegs(ExecContext *src, ExecContext *dest);
|
||||
|
||||
void copyMiscRegs(ExecContext *src, ExecContext *dest);
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void copyIprs(ExecContext *src, ExecContext *dest);
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __ARCH_ALPHA_ISA_TRAITS_HH__
|
||||
|
||||
@@ -199,6 +199,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
|
||||
|
||||
Param<string> boot_osflags;
|
||||
Param<string> readfile;
|
||||
Param<string> symbolfile;
|
||||
Param<unsigned int> init_param;
|
||||
|
||||
Param<uint64_t> system_type;
|
||||
@@ -218,6 +219,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
|
||||
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
|
||||
"a"),
|
||||
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
|
||||
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
|
||||
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
|
||||
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
|
||||
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
|
||||
@@ -237,6 +239,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem)
|
||||
p->boot_osflags = boot_osflags;
|
||||
p->init_param = init_param;
|
||||
p->readfile = readfile;
|
||||
p->symbolfile = symbolfile;
|
||||
p->system_type = system_type;
|
||||
p->system_rev = system_rev;
|
||||
return new LinuxAlphaSystem(p);
|
||||
|
||||
@@ -229,6 +229,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem)
|
||||
|
||||
Param<std::string> boot_osflags;
|
||||
Param<std::string> readfile;
|
||||
Param<std::string> symbolfile;
|
||||
Param<unsigned int> init_param;
|
||||
|
||||
Param<uint64_t> system_type;
|
||||
@@ -248,6 +249,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem)
|
||||
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
|
||||
"a"),
|
||||
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
|
||||
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
|
||||
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
|
||||
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
|
||||
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
|
||||
@@ -267,6 +269,7 @@ CREATE_SIM_OBJECT(AlphaSystem)
|
||||
p->boot_osflags = boot_osflags;
|
||||
p->init_param = init_param;
|
||||
p->readfile = readfile;
|
||||
p->symbolfile = symbolfile;
|
||||
p->system_type = system_type;
|
||||
p->system_rev = system_rev;
|
||||
return new AlphaSystem(p);
|
||||
|
||||
@@ -103,6 +103,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
|
||||
|
||||
Param<string> boot_osflags;
|
||||
Param<string> readfile;
|
||||
Param<string> symbolfile;
|
||||
Param<unsigned int> init_param;
|
||||
|
||||
Param<uint64_t> system_type;
|
||||
@@ -122,6 +123,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
|
||||
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
|
||||
"a"),
|
||||
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
|
||||
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
|
||||
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
|
||||
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 12),
|
||||
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 2<<1)
|
||||
@@ -141,6 +143,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem)
|
||||
p->boot_osflags = boot_osflags;
|
||||
p->init_param = init_param;
|
||||
p->readfile = readfile;
|
||||
p->symbolfile = symbolfile;
|
||||
p->system_type = system_type;
|
||||
p->system_rev = system_rev;
|
||||
|
||||
|
||||
@@ -48,6 +48,9 @@
|
||||
|
||||
#include "base/trace.hh"
|
||||
|
||||
// Hack
|
||||
#include "sim/stat_control.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<BaseCPU *> BaseCPU::cpuList;
|
||||
@@ -57,6 +60,30 @@ vector<BaseCPU *> BaseCPU::cpuList;
|
||||
// been initialized
|
||||
int maxThreadsPerCPU = 1;
|
||||
|
||||
void
|
||||
CPUProgressEvent::process()
|
||||
{
|
||||
Counter temp = cpu->totalInstructions();
|
||||
#ifndef NDEBUG
|
||||
double ipc = double(temp - lastNumInst) / (interval / cpu->cycles(1));
|
||||
|
||||
DPRINTFN("%s progress event, instructions committed: %lli, IPC: %0.8d\n",
|
||||
cpu->name(), temp - lastNumInst, ipc);
|
||||
ipc = 0.0;
|
||||
#else
|
||||
cprintf("%lli: %s progress event, instructions committed: %lli\n",
|
||||
curTick, cpu->name(), temp - lastNumInst);
|
||||
#endif
|
||||
lastNumInst = temp;
|
||||
schedule(curTick + interval);
|
||||
}
|
||||
|
||||
const char *
|
||||
CPUProgressEvent::description()
|
||||
{
|
||||
return "CPU Progress event";
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
BaseCPU::BaseCPU(Params *p)
|
||||
: MemObject(p->name), clock(p->clock), checkInterrupts(true),
|
||||
@@ -67,6 +94,7 @@ BaseCPU::BaseCPU(Params *p)
|
||||
number_of_threads(p->numberOfThreads), system(p->system)
|
||||
#endif
|
||||
{
|
||||
// currentTick = curTick;
|
||||
DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
|
||||
|
||||
// add self to global list of CPUs
|
||||
@@ -128,6 +156,12 @@ BaseCPU::BaseCPU(Params *p)
|
||||
p->max_loads_all_threads, *counter);
|
||||
}
|
||||
|
||||
if (p->stats_reset_inst != 0) {
|
||||
Stats::SetupEvent(Stats::Reset, p->stats_reset_inst, 0, comInstEventQueue[0]);
|
||||
cprintf("Stats reset event scheduled for %lli insts\n",
|
||||
p->stats_reset_inst);
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
memset(interrupts, 0, sizeof(interrupts));
|
||||
intstatus = 0;
|
||||
@@ -153,7 +187,6 @@ BaseCPU::BaseCPU(Params *p)
|
||||
if (params->profile)
|
||||
profileEvent = new ProfileEvent(this, params->profile);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
BaseCPU::Params::Params()
|
||||
@@ -188,6 +221,11 @@ BaseCPU::startup()
|
||||
if (!params->deferRegistration && profileEvent)
|
||||
profileEvent->schedule(curTick);
|
||||
#endif
|
||||
|
||||
if (params->progress_interval) {
|
||||
new CPUProgressEvent(&mainEventQueue, params->progress_interval,
|
||||
this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -238,7 +276,11 @@ BaseCPU::registerThreadContexts()
|
||||
void
|
||||
BaseCPU::switchOut()
|
||||
{
|
||||
panic("This CPU doesn't support sampling!");
|
||||
// panic("This CPU doesn't support sampling!");
|
||||
#if FULL_SYSTEM
|
||||
if (profileEvent && profileEvent->scheduled())
|
||||
profileEvent->deschedule();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@@ -261,18 +303,22 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
|
||||
assert(newTC->getProcessPtr() == oldTC->getProcessPtr());
|
||||
newTC->getProcessPtr()->replaceThreadContext(newTC, newTC->readCpuId());
|
||||
#endif
|
||||
|
||||
// TheISA::compareXCs(oldXC, newXC);
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
for (int i = 0; i < TheISA::NumInterruptLevels; ++i)
|
||||
interrupts[i] = oldCPU->interrupts[i];
|
||||
intstatus = oldCPU->intstatus;
|
||||
checkInterrupts = oldCPU->checkInterrupts;
|
||||
|
||||
for (int i = 0; i < threadContexts.size(); ++i)
|
||||
threadContexts[i]->profileClear();
|
||||
|
||||
if (profileEvent)
|
||||
profileEvent->schedule(curTick);
|
||||
// The Sampler must take care of this!
|
||||
// if (profileEvent)
|
||||
// profileEvent->schedule(curTick);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,23 @@ class ThreadContext;
|
||||
class System;
|
||||
class Port;
|
||||
|
||||
class CPUProgressEvent : public Event
|
||||
{
|
||||
protected:
|
||||
Tick interval;
|
||||
Counter lastNumInst;
|
||||
BaseCPU *cpu;
|
||||
|
||||
public:
|
||||
CPUProgressEvent(EventQueue *q, Tick ival, BaseCPU *_cpu)
|
||||
: Event(q, Event::Stat_Event_Pri), interval(ival), lastNumInst(0), cpu(_cpu)
|
||||
{ schedule(curTick + interval); }
|
||||
|
||||
void process();
|
||||
|
||||
virtual const char *description();
|
||||
};
|
||||
|
||||
class BaseCPU : public MemObject
|
||||
{
|
||||
protected:
|
||||
@@ -53,6 +70,7 @@ class BaseCPU : public MemObject
|
||||
Tick clock;
|
||||
|
||||
public:
|
||||
// Tick currentTick;
|
||||
inline Tick frequency() const { return Clock::Frequency / clock; }
|
||||
inline Tick cycles(int numCycles) const { return clock * numCycles; }
|
||||
inline Tick curCycle() const { return curTick / clock; }
|
||||
@@ -120,6 +138,7 @@ class BaseCPU : public MemObject
|
||||
Counter max_insts_all_threads;
|
||||
Counter max_loads_any_thread;
|
||||
Counter max_loads_all_threads;
|
||||
Counter stats_reset_inst;
|
||||
Tick clock;
|
||||
bool functionTrace;
|
||||
Tick functionTraceStart;
|
||||
@@ -128,6 +147,7 @@ class BaseCPU : public MemObject
|
||||
int cpu_id;
|
||||
Tick profile;
|
||||
#endif
|
||||
Tick progress_interval;
|
||||
BaseCPU *checker;
|
||||
|
||||
Params();
|
||||
|
||||
@@ -197,7 +197,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
||||
|
||||
union Result {
|
||||
uint64_t integer;
|
||||
float fp;
|
||||
// float fp;
|
||||
double dbl;
|
||||
};
|
||||
|
||||
@@ -394,7 +394,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
||||
uint64_t readIntResult() { return instResult.integer; }
|
||||
|
||||
/** Returns the result of a floating point instruction. */
|
||||
float readFloatResult() { return instResult.fp; }
|
||||
float readFloatResult() { return (float)instResult.dbl; }
|
||||
|
||||
/** Returns the result of a floating point (double) instruction. */
|
||||
double readDoubleResult() { return instResult.dbl; }
|
||||
@@ -419,7 +419,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
|
||||
/** Records an fp register being set to a value. */
|
||||
void setFloatReg(const StaticInst *si, int idx, FloatReg val)
|
||||
{
|
||||
instResult.fp = val;
|
||||
// instResult.fp = val;
|
||||
instResult.dbl = (double)val;
|
||||
}
|
||||
|
||||
/** Records an fp register being set to an integer value. */
|
||||
|
||||
@@ -102,6 +102,7 @@ class CheckerCPU : public BaseCPU
|
||||
Process *process;
|
||||
#endif
|
||||
bool exitOnError;
|
||||
bool updateOnError;
|
||||
bool warnOnlyOnLoadError;
|
||||
};
|
||||
|
||||
@@ -148,7 +149,7 @@ class CheckerCPU : public BaseCPU
|
||||
|
||||
union Result {
|
||||
uint64_t integer;
|
||||
float fp;
|
||||
// float fp;
|
||||
double dbl;
|
||||
};
|
||||
|
||||
@@ -269,7 +270,7 @@ class CheckerCPU : public BaseCPU
|
||||
{
|
||||
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
|
||||
thread->setFloatReg(reg_idx, val);
|
||||
result.fp = val;
|
||||
result.dbl = (double)val;
|
||||
}
|
||||
|
||||
void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val,
|
||||
@@ -318,7 +319,7 @@ class CheckerCPU : public BaseCPU
|
||||
return thread->setMiscRegWithEffect(misc_reg, val);
|
||||
}
|
||||
|
||||
void recordPCChange(uint64_t val) { changedPC = true; }
|
||||
void recordPCChange(uint64_t val) { changedPC = true; newPC = val; }
|
||||
void recordNextPCChange(uint64_t val) { changedNextPC = true; }
|
||||
|
||||
bool translateInstReq(Request *req);
|
||||
@@ -360,6 +361,7 @@ class CheckerCPU : public BaseCPU
|
||||
uint64_t newPC;
|
||||
bool changedNextPC;
|
||||
bool exitOnError;
|
||||
bool updateOnError;
|
||||
bool warnOnlyOnLoadError;
|
||||
|
||||
InstSeqNum youngestSN;
|
||||
@@ -376,7 +378,7 @@ class Checker : public CheckerCPU
|
||||
{
|
||||
public:
|
||||
Checker(Params *p)
|
||||
: CheckerCPU(p)
|
||||
: CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL)
|
||||
{ }
|
||||
|
||||
void switchOut();
|
||||
@@ -393,12 +395,19 @@ class Checker : public CheckerCPU
|
||||
private:
|
||||
void handleError(DynInstPtr &inst)
|
||||
{
|
||||
if (exitOnError)
|
||||
if (exitOnError) {
|
||||
dumpAndExit(inst);
|
||||
} else if (updateOnError) {
|
||||
updateThisCycle = true;
|
||||
}
|
||||
}
|
||||
|
||||
void dumpAndExit(DynInstPtr &inst);
|
||||
|
||||
bool updateThisCycle;
|
||||
|
||||
DynInstPtr unverifiedInst;
|
||||
|
||||
std::list<DynInstPtr> instList;
|
||||
typedef typename std::list<DynInstPtr>::iterator InstListIt;
|
||||
void dumpInsts();
|
||||
|
||||
@@ -94,6 +94,8 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
|
||||
}
|
||||
}
|
||||
|
||||
unverifiedInst = inst;
|
||||
|
||||
// Try to check all instructions that are completed, ending if we
|
||||
// run out of instructions to check or if an instruction is not
|
||||
// yet completed.
|
||||
@@ -171,7 +173,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
|
||||
thread->setPC(thread->readNextPC());
|
||||
thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
|
||||
|
||||
return;
|
||||
break;
|
||||
} else {
|
||||
// The instruction is carrying an ITB fault. Handle
|
||||
// the fault and see if our results match the CPU on
|
||||
@@ -220,7 +222,8 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
|
||||
|
||||
thread->funcExeInst++;
|
||||
|
||||
fault = curStaticInst->execute(this, NULL);
|
||||
if (!inst->isUnverifiable())
|
||||
fault = curStaticInst->execute(this, NULL);
|
||||
|
||||
// Checks to make sure instrution results are correct.
|
||||
validateExecution(inst);
|
||||
@@ -289,6 +292,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
|
||||
break;
|
||||
}
|
||||
}
|
||||
unverifiedInst = NULL;
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
@@ -395,6 +399,23 @@ template <class DynInstPtr>
|
||||
void
|
||||
Checker<DynInstPtr>::validateState()
|
||||
{
|
||||
if (updateThisCycle) {
|
||||
warn("%lli: Instruction PC %#x results didn't match up, copying all "
|
||||
"registers from main CPU", curTick, unverifiedInst->readPC());
|
||||
// Heavy-weight copying of all registers
|
||||
cpuXC->copyArchRegs(unverifiedInst->xcBase());
|
||||
// Also advance the PC. Hopefully no PC-based events happened.
|
||||
#if THE_ISA != MIPS_ISA
|
||||
// go to the next instruction
|
||||
cpuXC->setPC(cpuXC->readNextPC());
|
||||
cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
|
||||
#else
|
||||
// go to the next instruction
|
||||
cpuXC->setPC(cpuXC->readNextPC());
|
||||
cpuXC->setNextPC(cpuXC->readNextNPC());
|
||||
cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
|
||||
#endif
|
||||
updateThisCycle = false;
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
|
||||
@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
|
||||
Param<int> cpu_id;
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
SimObjectParam<AlphaDTB *> dtb;
|
||||
Param<Tick> profile;
|
||||
#else
|
||||
SimObjectVectorParam<Process *> workload;
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread;
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
Param<unsigned> cachePorts;
|
||||
|
||||
@@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
|
||||
INIT_PARAM(cpu_id, "processor ID"),
|
||||
INIT_PARAM(itb, "Instruction translation buffer"),
|
||||
INIT_PARAM(dtb, "Data translation buffer"),
|
||||
INIT_PARAM(profile, ""),
|
||||
#else
|
||||
INIT_PARAM(workload, "Processes to run"),
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
|
||||
"Terminate when all threads have reached this load"
|
||||
"count",
|
||||
0),
|
||||
INIT_PARAM_DFLT(stats_reset_inst,
|
||||
"blah",
|
||||
0),
|
||||
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
|
||||
|
||||
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
|
||||
|
||||
@@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
|
||||
params->cpu_id = cpu_id;
|
||||
params->itb = itb;
|
||||
params->dtb = dtb;
|
||||
params->profile = profile;
|
||||
#else
|
||||
params->workload = workload;
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU)
|
||||
params->max_insts_all_threads = max_insts_all_threads;
|
||||
params->max_loads_any_thread = max_loads_any_thread;
|
||||
params->max_loads_all_threads = max_loads_all_threads;
|
||||
params->stats_reset_inst = stats_reset_inst;
|
||||
params->progress_interval = progress_interval;
|
||||
|
||||
//
|
||||
// Caches
|
||||
|
||||
@@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
@@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
|
||||
|
||||
Param<bool> defer_registration;
|
||||
Param<bool> exitOnError;
|
||||
Param<bool> updateOnError;
|
||||
Param<bool> warnOnlyOnLoadError;
|
||||
Param<bool> function_trace;
|
||||
Param<Tick> function_trace_start;
|
||||
@@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
|
||||
"terminate when any thread reaches this load count"),
|
||||
INIT_PARAM(max_loads_all_threads,
|
||||
"terminate when all threads have reached this load count"),
|
||||
INIT_PARAM(stats_reset_inst,
|
||||
"blah"),
|
||||
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
|
||||
|
||||
#if FULL_SYSTEM
|
||||
INIT_PARAM(itb, "Instruction TLB"),
|
||||
@@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
|
||||
|
||||
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
|
||||
INIT_PARAM(exitOnError, "exit on error"),
|
||||
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
|
||||
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
|
||||
"result errors", false),
|
||||
INIT_PARAM(function_trace, "Enable function trace"),
|
||||
@@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker)
|
||||
params->max_insts_all_threads = 0;
|
||||
params->max_loads_any_thread = 0;
|
||||
params->max_loads_all_threads = 0;
|
||||
params->stats_reset_inst = 0;
|
||||
params->exitOnError = exitOnError;
|
||||
params->updateOnError = updateOnError;
|
||||
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
|
||||
params->deferRegistration = defer_registration;
|
||||
params->functionTrace = function_trace;
|
||||
@@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker)
|
||||
temp = max_insts_all_threads;
|
||||
temp = max_loads_any_thread;
|
||||
temp = max_loads_all_threads;
|
||||
temp = stats_reset_inst;
|
||||
Tick temp2 = progress_interval;
|
||||
params->progress_interval = 0;
|
||||
temp2++;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
params->itb = itb;
|
||||
|
||||
@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
||||
|
||||
// Generate trap squash event.
|
||||
generateTrapEvent(tid);
|
||||
|
||||
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
|
||||
return false;
|
||||
}
|
||||
|
||||
updateComInstStats(head_inst);
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread[tid]->profile) {
|
||||
// bool usermode =
|
||||
// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
|
||||
// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
|
||||
thread[tid]->profilePC = head_inst->readPC();
|
||||
ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
|
||||
head_inst->staticInst);
|
||||
|
||||
if (node)
|
||||
thread[tid]->profileNode = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (head_inst->traceData) {
|
||||
head_inst->traceData->setFetchSeq(head_inst->seqNum);
|
||||
head_inst->traceData->setCPSeq(thread[tid]->numInst);
|
||||
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
||||
head_inst->renamedDestRegIdx(i));
|
||||
}
|
||||
|
||||
if (head_inst->isCopy())
|
||||
panic("Should not commit any copy instructions!");
|
||||
|
||||
// Finally clear the head ROB entry.
|
||||
rob->retireHead(tid);
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "config/use_checker.hh"
|
||||
|
||||
#if FULL_SYSTEM
|
||||
#include "cpu/quiesce_event.hh"
|
||||
#include "sim/system.hh"
|
||||
#else
|
||||
#include "sim/process.hh"
|
||||
@@ -793,6 +794,8 @@ template <class Impl>
|
||||
unsigned int
|
||||
FullO3CPU<Impl>::drain(Event *drain_event)
|
||||
{
|
||||
DPRINTF(O3CPU, "Switching out\n");
|
||||
BaseCPU::switchOut(_sampler);
|
||||
drainCount = 0;
|
||||
fetch.drain();
|
||||
decode.drain();
|
||||
@@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut()
|
||||
{
|
||||
fetch.switchOut();
|
||||
rename.switchOut();
|
||||
iew.switchOut();
|
||||
commit.switchOut();
|
||||
instList.clear();
|
||||
while (!removeList.empty()) {
|
||||
@@ -930,6 +934,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||
tickEvent.schedule(curTick);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::serialize(std::ostream &os)
|
||||
{
|
||||
BaseCPU::serialize(os);
|
||||
nameOut(os, csprintf("%s.tickEvent", name()));
|
||||
tickEvent.serialize(os);
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// write out the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
for (int i = 0; i < thread.size(); i++) {
|
||||
nameOut(os, csprintf("%s.xc.%i", name(), i));
|
||||
temp.copyXC(thread[i]->getXCProxy());
|
||||
temp.serialize(os);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion)
|
||||
{
|
||||
BaseCPU::unserialize(cp, section);
|
||||
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// read in the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
for (int i = 0; i < thread.size(); i++) {
|
||||
temp.copyXC(thread[i]->getXCProxy());
|
||||
temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
|
||||
thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readIntReg(int reg_idx)
|
||||
|
||||
@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
|
||||
wroteToTimeBuffer = false;
|
||||
_status = Inactive;
|
||||
switchedOut = false;
|
||||
interruptPending = false;
|
||||
branchPred.takeOverFrom();
|
||||
}
|
||||
|
||||
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||
unsigned flags = 0;
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
if (cacheBlocked || (interruptPending && flags == 0)) {
|
||||
if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
|
||||
// Hold off fetch from getting new instructions when:
|
||||
// Cache is blocked, or
|
||||
// while an interrupt is pending and we're not in PAL mode, or
|
||||
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||
fetch_PC = next_PC;
|
||||
|
||||
if (instruction->isQuiesce()) {
|
||||
warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
|
||||
curTick);
|
||||
// warn("%lli: Quiesce instruction encountered, halting fetch!",
|
||||
// curTick);
|
||||
fetchStatus[tid] = QuiescePending;
|
||||
++numInst;
|
||||
status_change = true;
|
||||
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||
fetchStatus[tid] = TrapPending;
|
||||
status_change = true;
|
||||
|
||||
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
|
||||
// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
|
||||
#else // !FULL_SYSTEM
|
||||
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
@@ -216,6 +216,7 @@ class DefaultIEW
|
||||
if (++wbOutstanding == wbMax)
|
||||
ableToIssue = false;
|
||||
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
|
||||
assert(wbOutstanding <= wbMax);
|
||||
#ifdef DEBUG
|
||||
wbList.insert(sn);
|
||||
#endif
|
||||
@@ -226,6 +227,7 @@ class DefaultIEW
|
||||
if (wbOutstanding-- == wbMax)
|
||||
ableToIssue = true;
|
||||
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
|
||||
assert(wbOutstanding >= 0);
|
||||
#ifdef DEBUG
|
||||
assert(wbList.find(sn) != wbList.end());
|
||||
wbList.erase(sn);
|
||||
@@ -450,7 +452,9 @@ class DefaultIEW
|
||||
unsigned wbCycle;
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
unsigned wbOutstanding;
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
int wbOutstanding;
|
||||
|
||||
/** Writeback width. */
|
||||
unsigned wbWidth;
|
||||
@@ -507,6 +511,8 @@ class DefaultIEW
|
||||
Stats::Scalar<> iewExecutedInsts;
|
||||
/** Stat for total number of executed load instructions. */
|
||||
Stats::Vector<> iewExecLoadInsts;
|
||||
/** Stat for total number of executed store instructions. */
|
||||
// Stats::Scalar<> iewExecStoreInsts;
|
||||
/** Stat for total number of squashed instructions skipped at execute. */
|
||||
Stats::Scalar<> iewExecSquashedInsts;
|
||||
/** Number of executed software prefetches. */
|
||||
|
||||
@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
|
||||
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
|
||||
|
||||
iewExecutedInsts
|
||||
.name(name() + ".EXEC:insts")
|
||||
.name(name() + ".iewExecutedInsts")
|
||||
.desc("Number of executed instructions");
|
||||
|
||||
iewExecLoadInsts
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".EXEC:loads")
|
||||
.name(name() + ".iewExecLoadInsts")
|
||||
.desc("Number of load instructions executed")
|
||||
.flags(total);
|
||||
|
||||
iewExecSquashedInsts
|
||||
.name(name() + ".EXEC:squashedInsts")
|
||||
.name(name() + ".iewExecSquashedInsts")
|
||||
.desc("Number of squashed instructions skipped in execute");
|
||||
|
||||
iewExecutedSwp
|
||||
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
|
||||
{
|
||||
// Clear any state.
|
||||
switchedOut = true;
|
||||
assert(insts[0].empty());
|
||||
assert(skidBuffer[0].empty());
|
||||
|
||||
instQueue.switchOut();
|
||||
ldstQueue.switchOut();
|
||||
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
|
||||
|
||||
updateLSQNextCycle = false;
|
||||
|
||||
// @todo: Fix hardcoded number
|
||||
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
|
||||
issueToExecQueue.advance();
|
||||
}
|
||||
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
||||
wbNumInst = 0;
|
||||
}
|
||||
|
||||
assert((wbCycle * wbWidth + wbNumInst) < wbMax);
|
||||
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
|
||||
}
|
||||
|
||||
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
|
||||
wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
|
||||
// Add finished instruction to queue to commit.
|
||||
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
|
||||
(*iewQueue)[wbCycle].size++;
|
||||
@@ -901,6 +904,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
|
||||
{
|
||||
while (!insts[tid].empty()) {
|
||||
if (insts[tid].front()->isLoad() ||
|
||||
insts[tid].front()->isStore() ) {
|
||||
toRename->iewInfo[tid].dispatchedToLSQ++;
|
||||
}
|
||||
|
||||
toRename->iewInfo[tid].dispatched++;
|
||||
|
||||
insts[tid].pop();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::wakeCPU()
|
||||
@@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts()
|
||||
// event adds the instruction to the queue to commit
|
||||
fault = ldstQueue.executeLoad(inst);
|
||||
} else if (inst->isStore()) {
|
||||
ldstQueue.executeStore(inst);
|
||||
fault = ldstQueue.executeStore(inst);
|
||||
|
||||
// If the store had a fault then it may not have a mem req
|
||||
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
|
||||
if (!inst->isStoreConditional() && fault == NoFault) {
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
} else if (fault != NoFault) {
|
||||
// If the instruction faulted, then we need to send it along to commit
|
||||
// without the instruction completing.
|
||||
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
activityThisCycle();
|
||||
}
|
||||
|
||||
// Store conditionals will mark themselves as
|
||||
@@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts()
|
||||
// E.g. Uncached loads have not actually executed when they
|
||||
// are first sent to commit. Instead commit must tell the LSQ
|
||||
// when it's ready to execute the uncached load.
|
||||
if (!inst->isSquashed() && inst->isExecuted()) {
|
||||
if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
|
||||
int dependents = instQueue.wakeDependents(inst);
|
||||
|
||||
for (int i = 0; i < inst->numDestRegs(); i++) {
|
||||
|
||||
@@ -479,13 +479,13 @@ class InstructionQueue
|
||||
/** Distribution of number of instructions in the queue.
|
||||
* @todo: Need to create struct to track the entry time for each
|
||||
* instruction. */
|
||||
Stats::VectorDistribution<> queueResDist;
|
||||
// Stats::VectorDistribution<> queueResDist;
|
||||
/** Distribution of the number of instructions issued. */
|
||||
Stats::Distribution<> numIssuedDist;
|
||||
/** Distribution of the cycles it takes to issue an instruction.
|
||||
* @todo: Need to create struct to track the ready time for each
|
||||
* instruction. */
|
||||
Stats::VectorDistribution<> issueDelayDist;
|
||||
// Stats::VectorDistribution<> issueDelayDist;
|
||||
|
||||
/** Number of times an instruction could not be issued because a
|
||||
* FU was busy.
|
||||
|
||||
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
|
||||
.name(name() + ".iqSquashedNonSpecRemoved")
|
||||
.desc("Number of squashed non-spec instructions that were removed")
|
||||
.prereq(iqSquashedNonSpecRemoved);
|
||||
|
||||
/*
|
||||
queueResDist
|
||||
.init(Num_OpClasses, 0, 99, 2)
|
||||
.name(name() + ".IQ:residence:")
|
||||
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
|
||||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
queueResDist.subname(i, opClassStrings[i]);
|
||||
}
|
||||
*/
|
||||
numIssuedDist
|
||||
.init(0,totalWidth,1)
|
||||
.name(name() + ".ISSUE:issued_per_cycle")
|
||||
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
|
||||
//
|
||||
// How long did instructions for a particular FU type wait prior to issue
|
||||
//
|
||||
|
||||
/*
|
||||
issueDelayDist
|
||||
.init(Num_OpClasses,0,99,2)
|
||||
.name(name() + ".ISSUE:")
|
||||
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
|
||||
subname << opClassStrings[i] << "_delay";
|
||||
issueDelayDist.subname(i, subname.str());
|
||||
}
|
||||
|
||||
*/
|
||||
issueRate
|
||||
.name(name() + ".ISSUE:rate")
|
||||
.desc("Inst issue rate")
|
||||
@@ -385,8 +386,16 @@ template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::switchOut()
|
||||
{
|
||||
/*
|
||||
if (!instList[0].empty() || (numEntries != freeEntries) ||
|
||||
!readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
|
||||
dumpInsts();
|
||||
// assert(0);
|
||||
}
|
||||
*/
|
||||
resetState();
|
||||
dependGraph.reset();
|
||||
instsToExecute.clear();
|
||||
switchedOut = true;
|
||||
for (int i = 0; i < numThreads; ++i) {
|
||||
memDepUnit[i].switchOut();
|
||||
@@ -642,9 +651,12 @@ template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
|
||||
{
|
||||
DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
|
||||
// The CPU could have been sleeping until this op completed (*extremely*
|
||||
// long latency op). Wake it if it was. This may be overkill.
|
||||
if (isSwitchedOut()) {
|
||||
DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
|
||||
inst->seqNum);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
(squashed_inst->isMemRef() &&
|
||||
!squashed_inst->memOpDone)) {
|
||||
|
||||
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
|
||||
"squashed.\n",
|
||||
tid, squashed_inst->seqNum, squashed_inst->readPC());
|
||||
|
||||
// Remove the instruction from the dependency list.
|
||||
if (!squashed_inst->isNonSpeculative() &&
|
||||
!squashed_inst->isStoreConditional() &&
|
||||
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
|
||||
++iqSquashedOperandsExamined;
|
||||
}
|
||||
} else {
|
||||
} else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
|
||||
NonSpecMapIt ns_inst_it =
|
||||
nonSpecInsts.find(squashed_inst->seqNum);
|
||||
assert(ns_inst_it != nonSpecInsts.end());
|
||||
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
count[squashed_inst->threadNumber]--;
|
||||
|
||||
++freeEntries;
|
||||
|
||||
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
|
||||
"squashed.\n",
|
||||
tid, squashed_inst->seqNum, squashed_inst->readPC());
|
||||
}
|
||||
|
||||
instList[tid].erase(squash_it--);
|
||||
|
||||
@@ -165,6 +165,16 @@ LSQ<Impl>::regStats()
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::regStats()
|
||||
{
|
||||
//Initialize LSQs
|
||||
for (int tid=0; tid < numThreads; tid++) {
|
||||
thread[tid].regStats();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
|
||||
|
||||
@@ -407,20 +407,9 @@ class LSQUnit {
|
||||
// Will also need how many read/write ports the Dcache has. Or keep track
|
||||
// of that in stage that is one level up, and only call executeLoad/Store
|
||||
// the appropriate number of times.
|
||||
|
||||
/** Total number of loads forwaded from LSQ stores. */
|
||||
Stats::Scalar<> lsqForwLoads;
|
||||
|
||||
/** Total number of loads ignored due to invalid addresses. */
|
||||
Stats::Scalar<> invAddrLoads;
|
||||
|
||||
/** Total number of squashed loads. */
|
||||
Stats::Scalar<> lsqSquashedLoads;
|
||||
|
||||
/** Total number of responses from the memory system that are
|
||||
* ignored due to the instruction already being squashed. */
|
||||
Stats::Scalar<> lsqIgnoredResponses;
|
||||
|
||||
/** Total number of squashed stores. */
|
||||
Stats::Scalar<> lsqSquashedStores;
|
||||
|
||||
|
||||
@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
|
||||
.name(name() + ".ignoredResponses")
|
||||
.desc("Number of memory responses ignored because the instruction is squashed");
|
||||
|
||||
lsqMemOrderViolation
|
||||
.name(name() + ".memOrderViolation")
|
||||
.desc("Number of memory ordering violations");
|
||||
|
||||
lsqSquashedStores
|
||||
.name(name() + ".squashedStores")
|
||||
.desc("Number of stores squashed");
|
||||
@@ -220,8 +224,10 @@ void
|
||||
LSQUnit<Impl>::switchOut()
|
||||
{
|
||||
switchedOut = true;
|
||||
for (int i = 0; i < loadQueue.size(); ++i)
|
||||
for (int i = 0; i < loadQueue.size(); ++i) {
|
||||
assert(!loadQueue[i]);
|
||||
loadQueue[i] = NULL;
|
||||
}
|
||||
|
||||
assert(storesToWB == 0);
|
||||
}
|
||||
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||
if (load_fault != NoFault) {
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
// Mark it as executed unless it is an uncached load that
|
||||
// needs to hit the head of commit.
|
||||
if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
|
||||
inst->setExecuted();
|
||||
}
|
||||
iewStage->instToCommit(inst);
|
||||
iewStage->activityThisCycle();
|
||||
}
|
||||
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
|
||||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
memDepViolator = loadQueue[load_idx];
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return genMachineCheckFault();
|
||||
}
|
||||
|
||||
@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::switchOut()
|
||||
{
|
||||
assert(instList[0].empty());
|
||||
assert(instsToReplay.empty());
|
||||
assert(memDepHash.empty());
|
||||
// Clear any state.
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||
instList[i].clear();
|
||||
|
||||
@@ -417,6 +417,8 @@ class DefaultRename
|
||||
/** The maximum skid buffer size. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
PhysRegIndex maxPhysicalRegs;
|
||||
|
||||
/** Enum to record the source of a structure full stall. Can come from
|
||||
* either ROB, IQ, LSQ, and it is priortized in that order.
|
||||
*/
|
||||
|
||||
@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
|
||||
commitToRenameDelay(params->commitToRenameDelay),
|
||||
renameWidth(params->renameWidth),
|
||||
commitWidth(params->commitWidth),
|
||||
numThreads(params->numberOfThreads)
|
||||
numThreads(params->numberOfThreads),
|
||||
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
|
||||
// Put the renamed physical register back on the free list.
|
||||
freeList->addReg(hb_it->newPhysReg);
|
||||
|
||||
// Be sure to mark its register as ready if it's a misc register.
|
||||
if (hb_it->newPhysReg >= maxPhysicalRegs) {
|
||||
scoreboard->setReg(hb_it->newPhysReg);
|
||||
}
|
||||
|
||||
historyBuffer[i].erase(hb_it++);
|
||||
}
|
||||
insts[i].clear();
|
||||
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
|
||||
// Put the renamed physical register back on the free list.
|
||||
freeList->addReg(hb_it->newPhysReg);
|
||||
|
||||
// Be sure to mark its register as ready if it's a misc register.
|
||||
if (hb_it->newPhysReg >= maxPhysicalRegs) {
|
||||
scoreboard->setReg(hb_it->newPhysReg);
|
||||
}
|
||||
|
||||
historyBuffer[tid].erase(hb_it++);
|
||||
|
||||
++renameUndoneMaps;
|
||||
|
||||
@@ -31,8 +31,11 @@
|
||||
#ifndef __CPU_O3_THREAD_STATE_HH__
|
||||
#define __CPU_O3_THREAD_STATE_HH__
|
||||
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class Event;
|
||||
class Process;
|
||||
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
|
||||
#if FULL_SYSTEM
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num)
|
||||
: ThreadState(-1, _thread_num),
|
||||
inSyscall(0), trapPending(0)
|
||||
{ }
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
{
|
||||
if (cpu->params->profile) {
|
||||
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<O3ThreadState,
|
||||
&O3ThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
}
|
||||
#else
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
|
||||
MemObject *mem)
|
||||
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
|
||||
/** Handles the syscall. */
|
||||
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(xcProxy, *os);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_THREAD_STATE_HH__
|
||||
|
||||
@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||
for (int i = 0; i < localPredictorSize; ++i)
|
||||
localCtrs[i].setBits(localCtrBits);
|
||||
|
||||
localPredictorMask = floorPow2(localPredictorSize) - 1;
|
||||
|
||||
if (!isPowerOf2(localHistoryTableSize)) {
|
||||
fatal("Invalid local history table size!\n");
|
||||
}
|
||||
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||
//Lookup in the local predictor to get its branch prediction
|
||||
local_history_idx = calcLocHistIdx(branch_addr);
|
||||
local_predictor_idx = localHistoryTable[local_history_idx]
|
||||
& localHistoryMask;
|
||||
& localPredictorMask;
|
||||
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
|
||||
|
||||
//Lookup in the global predictor to get its branch prediction
|
||||
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||
bp_history = (void *)history;
|
||||
|
||||
assert(globalHistory < globalPredictorSize &&
|
||||
local_history_idx < localPredictorSize);
|
||||
local_history_idx < localHistoryTableSize &&
|
||||
local_predictor_idx < localPredictorSize);
|
||||
|
||||
// Commented code is for doing speculative update of counters and
|
||||
// all histories.
|
||||
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||
// Get the local predictor's current prediction
|
||||
local_history_idx = calcLocHistIdx(branch_addr);
|
||||
local_predictor_hist = localHistoryTable[local_history_idx];
|
||||
local_predictor_idx = local_predictor_hist & localHistoryMask;
|
||||
local_predictor_idx = local_predictor_hist & localPredictorMask;
|
||||
|
||||
// Update the choice predictor to tell it which one was correct if
|
||||
// there was a prediction.
|
||||
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||
}
|
||||
|
||||
assert(globalHistory < globalPredictorSize &&
|
||||
local_history_idx < localHistoryTableSize &&
|
||||
local_predictor_idx < localPredictorSize);
|
||||
|
||||
// Update the counters and local history with the proper
|
||||
|
||||
@@ -159,6 +159,9 @@ class TournamentBP
|
||||
/** Size of the local predictor. */
|
||||
unsigned localPredictorSize;
|
||||
|
||||
/** Mask to get the proper index bits into the predictor. */
|
||||
unsigned localPredictorMask;
|
||||
|
||||
/** Number of bits of the local predictor's counters. */
|
||||
unsigned localCtrBits;
|
||||
|
||||
|
||||
@@ -65,6 +65,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
@@ -79,6 +81,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
|
||||
|
||||
Param<bool> defer_registration;
|
||||
Param<bool> exitOnError;
|
||||
Param<bool> updateOnError;
|
||||
Param<bool> warnOnlyOnLoadError;
|
||||
Param<bool> function_trace;
|
||||
Param<Tick> function_trace_start;
|
||||
@@ -95,6 +98,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
|
||||
"terminate when any thread reaches this load count"),
|
||||
INIT_PARAM(max_loads_all_threads,
|
||||
"terminate when all threads have reached this load count"),
|
||||
INIT_PARAM(stats_reset_inst,
|
||||
"blah"),
|
||||
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
|
||||
|
||||
#if FULL_SYSTEM
|
||||
INIT_PARAM(itb, "Instruction TLB"),
|
||||
@@ -110,6 +116,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
|
||||
|
||||
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
|
||||
INIT_PARAM(exitOnError, "exit on error"),
|
||||
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
|
||||
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
|
||||
"result errors", false),
|
||||
INIT_PARAM(function_trace, "Enable function trace"),
|
||||
@@ -127,7 +134,9 @@ CREATE_SIM_OBJECT(OzoneChecker)
|
||||
params->max_insts_all_threads = 0;
|
||||
params->max_loads_any_thread = 0;
|
||||
params->max_loads_all_threads = 0;
|
||||
params->stats_reset_inst = 0;
|
||||
params->exitOnError = exitOnError;
|
||||
params->updateOnError = updateOnError;
|
||||
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
|
||||
params->deferRegistration = defer_registration;
|
||||
params->functionTrace = function_trace;
|
||||
@@ -140,6 +149,10 @@ CREATE_SIM_OBJECT(OzoneChecker)
|
||||
temp = max_insts_all_threads;
|
||||
temp = max_loads_any_thread;
|
||||
temp = max_loads_all_threads;
|
||||
temp = stats_reset_inst;
|
||||
Tick temp2 = progress_interval;
|
||||
temp2++;
|
||||
params->progress_interval = 0;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
params->itb = itb;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005 The Regents of The University of Michigan
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -81,13 +81,13 @@ template <class>
|
||||
class Checker;
|
||||
|
||||
/**
|
||||
* Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
|
||||
* simple out-of-order capabilities added to it. It is still a 1 CPI machine
|
||||
* (?), but is capable of handling cache misses. Basically it models having
|
||||
* a ROB/IQ by only allowing a certain amount of instructions to execute while
|
||||
* the cache miss is outstanding.
|
||||
* Light weight out of order CPU model that approximates an out of
|
||||
* order CPU. It is separated into a front end and a back end, with
|
||||
* the template parameter Impl describing the classes used for each.
|
||||
* The goal is to be able to specify through the Impl the class to use
|
||||
* for the front end and back end, with different classes used to
|
||||
* model different levels of detail.
|
||||
*/
|
||||
|
||||
template <class Impl>
|
||||
class OzoneCPU : public BaseCPU
|
||||
{
|
||||
@@ -273,6 +273,7 @@ class OzoneCPU : public BaseCPU
|
||||
typedef OzoneThreadState<Impl> ImplState;
|
||||
|
||||
private:
|
||||
// Committed thread state for the OzoneCPU.
|
||||
OzoneThreadState<Impl> thread;
|
||||
|
||||
public:
|
||||
@@ -310,12 +311,6 @@ class OzoneCPU : public BaseCPU
|
||||
tickEvent.squash();
|
||||
}
|
||||
|
||||
private:
|
||||
Trace::InstRecord *traceData;
|
||||
|
||||
template<typename T>
|
||||
void trace_data(T data);
|
||||
|
||||
public:
|
||||
enum Status {
|
||||
Running,
|
||||
@@ -326,8 +321,6 @@ class OzoneCPU : public BaseCPU
|
||||
Status _status;
|
||||
|
||||
public:
|
||||
bool checkInterrupts;
|
||||
|
||||
void post_interrupt(int int_num, int index);
|
||||
|
||||
void zero_fill_64(Addr addr) {
|
||||
@@ -379,6 +372,7 @@ class OzoneCPU : public BaseCPU
|
||||
FrontEnd *frontEnd;
|
||||
|
||||
BackEnd *backEnd;
|
||||
|
||||
private:
|
||||
Status status() const { return _status; }
|
||||
void setStatus(Status new_status) { _status = new_status; }
|
||||
@@ -410,12 +404,11 @@ class OzoneCPU : public BaseCPU
|
||||
// number of idle cycles
|
||||
Stats::Average<> notIdleFraction;
|
||||
Stats::Formula idleFraction;
|
||||
public:
|
||||
|
||||
public:
|
||||
virtual void serialize(std::ostream &os);
|
||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
|
||||
|
||||
#if FULL_SYSTEM
|
||||
/** Translates instruction requestion. */
|
||||
Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
|
||||
@@ -582,12 +575,9 @@ class OzoneCPU : public BaseCPU
|
||||
|
||||
Fault copy(Addr dest);
|
||||
|
||||
InstSeqNum globalSeqNum;
|
||||
|
||||
public:
|
||||
void squashFromTC();
|
||||
|
||||
// @todo: This can be a useful debug function. Implement it.
|
||||
void dumpInsts() { frontEnd->dumpInsts(); }
|
||||
|
||||
#if FULL_SYSTEM
|
||||
@@ -605,7 +595,6 @@ class OzoneCPU : public BaseCPU
|
||||
|
||||
ThreadContext *tcBase() { return tc; }
|
||||
|
||||
bool decoupledFrontEnd;
|
||||
struct CommStruct {
|
||||
InstSeqNum doneSeqNum;
|
||||
InstSeqNum nonSpecSeqNum;
|
||||
@@ -614,8 +603,13 @@ class OzoneCPU : public BaseCPU
|
||||
|
||||
bool stall;
|
||||
};
|
||||
|
||||
InstSeqNum globalSeqNum;
|
||||
|
||||
TimeBuffer<CommStruct> comm;
|
||||
|
||||
bool decoupledFrontEnd;
|
||||
|
||||
bool lockFlag;
|
||||
|
||||
Stats::Scalar<> quiesceCycles;
|
||||
|
||||
@@ -63,6 +63,7 @@ SimObjectParam<System *> system;
|
||||
Param<int> cpu_id;
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
SimObjectParam<AlphaDTB *> dtb;
|
||||
Param<Tick> profile;
|
||||
#else
|
||||
SimObjectVectorParam<Process *> workload;
|
||||
//SimObjectParam<PageTable *> page_table;
|
||||
@@ -76,16 +77,19 @@ Param<Counter> max_insts_any_thread;
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
//SimObjectParam<BaseCache *> icache;
|
||||
//SimObjectParam<BaseCache *> dcache;
|
||||
|
||||
Param<unsigned> cachePorts;
|
||||
Param<unsigned> width;
|
||||
Param<unsigned> frontEndLatency;
|
||||
Param<unsigned> frontEndWidth;
|
||||
Param<unsigned> backEndLatency;
|
||||
Param<unsigned> backEndWidth;
|
||||
Param<unsigned> backEndSquashLatency;
|
||||
Param<unsigned> backEndLatency;
|
||||
Param<unsigned> maxInstBufferSize;
|
||||
Param<unsigned> numPhysicalRegs;
|
||||
Param<unsigned> maxOutstandingMemOps;
|
||||
@@ -140,6 +144,7 @@ Param<unsigned> RASSize;
|
||||
|
||||
Param<unsigned> LQEntries;
|
||||
Param<unsigned> SQEntries;
|
||||
Param<bool> lsqLimits;
|
||||
Param<unsigned> LFSTSize;
|
||||
Param<unsigned> SSITSize;
|
||||
|
||||
@@ -181,6 +186,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
|
||||
INIT_PARAM(cpu_id, "processor ID"),
|
||||
INIT_PARAM(itb, "Instruction translation buffer"),
|
||||
INIT_PARAM(dtb, "Data translation buffer"),
|
||||
INIT_PARAM(profile, ""),
|
||||
#else
|
||||
INIT_PARAM(workload, "Processes to run"),
|
||||
// INIT_PARAM(page_table, "Page table"),
|
||||
@@ -204,16 +210,21 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
|
||||
"Terminate when all threads have reached this load"
|
||||
"count",
|
||||
0),
|
||||
INIT_PARAM_DFLT(stats_reset_inst,
|
||||
"blah",
|
||||
0),
|
||||
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
|
||||
|
||||
// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
|
||||
// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
|
||||
|
||||
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
|
||||
INIT_PARAM_DFLT(width, "Width", 1),
|
||||
INIT_PARAM_DFLT(frontEndLatency, "Front end latency", 1),
|
||||
INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
|
||||
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
|
||||
INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
|
||||
INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
|
||||
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
|
||||
INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
|
||||
INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
|
||||
INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
|
||||
@@ -274,6 +285,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
|
||||
|
||||
INIT_PARAM(LQEntries, "Number of load queue entries"),
|
||||
INIT_PARAM(SQEntries, "Number of store queue entries"),
|
||||
INIT_PARAM_DFLT(lsqLimits, "LSQ size limits dispatch", true),
|
||||
INIT_PARAM(LFSTSize, "Last fetched store table size"),
|
||||
INIT_PARAM(SSITSize, "Store set ID table size"),
|
||||
|
||||
@@ -336,6 +348,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
|
||||
params->cpu_id = cpu_id;
|
||||
params->itb = itb;
|
||||
params->dtb = dtb;
|
||||
params->profile = profile;
|
||||
#else
|
||||
params->workload = workload;
|
||||
// params->pTable = page_table;
|
||||
@@ -347,6 +360,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
|
||||
params->max_insts_all_threads = max_insts_all_threads;
|
||||
params->max_loads_any_thread = max_loads_any_thread;
|
||||
params->max_loads_all_threads = max_loads_all_threads;
|
||||
params->stats_reset_inst = stats_reset_inst;
|
||||
params->progress_interval = progress_interval;
|
||||
|
||||
//
|
||||
// Caches
|
||||
@@ -357,6 +372,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
|
||||
|
||||
params->width = width;
|
||||
params->frontEndWidth = frontEndWidth;
|
||||
params->frontEndLatency = frontEndLatency;
|
||||
params->backEndWidth = backEndWidth;
|
||||
params->backEndSquashLatency = backEndSquashLatency;
|
||||
params->backEndLatency = backEndLatency;
|
||||
@@ -414,6 +430,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
|
||||
|
||||
params->LQEntries = LQEntries;
|
||||
params->SQEntries = SQEntries;
|
||||
params->lsqLimits = lsqLimits;
|
||||
|
||||
params->SSITSize = SSITSize;
|
||||
params->LFSTSize = LFSTSize;
|
||||
|
||||
@@ -50,7 +50,6 @@
|
||||
#include "arch/alpha/types.hh"
|
||||
#include "arch/vtophys.hh"
|
||||
#include "base/callback.hh"
|
||||
//#include "base/remote_gdb.hh"
|
||||
#include "cpu/profile.hh"
|
||||
#include "kern/kernel_stats.hh"
|
||||
#include "sim/faults.hh"
|
||||
@@ -67,15 +66,6 @@
|
||||
|
||||
using namespace TheISA;
|
||||
|
||||
template <class Impl>
|
||||
template<typename T>
|
||||
void
|
||||
OzoneCPU<Impl>::trace_data(T data) {
|
||||
if (traceData) {
|
||||
traceData->setData(data);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
|
||||
: Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
|
||||
@@ -112,7 +102,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
||||
_status = Idle;
|
||||
|
||||
if (p->checker) {
|
||||
#if USE_CHECKER
|
||||
|
||||
BaseCPU *temp_checker = p->checker;
|
||||
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
|
||||
checker->setMemory(mem);
|
||||
@@ -126,6 +116,8 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
||||
panic("Checker enabled but not compiled in!");
|
||||
#endif
|
||||
} else {
|
||||
// If checker is not being used, then the xcProxy points
|
||||
// directly to the CPU's ExecContext.
|
||||
checker = NULL;
|
||||
thread.tc = &ozoneTC;
|
||||
tc = &ozoneTC;
|
||||
@@ -138,7 +130,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
||||
|
||||
thread.setStatus(ThreadContext::Suspended);
|
||||
#if FULL_SYSTEM
|
||||
/***** All thread state stuff *****/
|
||||
// Setup thread state stuff.
|
||||
thread.cpu = this;
|
||||
thread.setTid(0);
|
||||
|
||||
@@ -187,12 +179,15 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
||||
frontEnd->setBackEnd(backEnd);
|
||||
backEnd->setFrontEnd(frontEnd);
|
||||
|
||||
decoupledFrontEnd = p->decoupledFrontEnd;
|
||||
|
||||
globalSeqNum = 1;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
checkInterrupts = false;
|
||||
#endif
|
||||
|
||||
lockFlag = 0;
|
||||
|
||||
// Setup rename table, initializing all values to ready.
|
||||
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
|
||||
thread.renameTable[i] = new DynInst(this);
|
||||
thread.renameTable[i]->setResultReady();
|
||||
@@ -233,8 +228,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
|
||||
thread.setVirtPort(virt_port);
|
||||
#endif
|
||||
|
||||
lockFlag = 0;
|
||||
|
||||
DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
|
||||
}
|
||||
|
||||
@@ -247,6 +240,7 @@ template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::switchOut()
|
||||
{
|
||||
BaseCPU::switchOut(_sampler);
|
||||
switchCount = 0;
|
||||
// Front end needs state from back end, so switch out the back end first.
|
||||
backEnd->switchOut();
|
||||
@@ -257,6 +251,8 @@ template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::signalSwitched()
|
||||
{
|
||||
// Only complete the switchout when both the front end and back
|
||||
// end have signalled they are ready to switch.
|
||||
if (++switchCount == 2) {
|
||||
backEnd->doSwitchOut();
|
||||
frontEnd->doSwitchOut();
|
||||
@@ -266,6 +262,17 @@ OzoneCPU<Impl>::signalSwitched()
|
||||
#endif
|
||||
|
||||
_status = SwitchedOut;
|
||||
#ifndef NDEBUG
|
||||
// Loop through all registers
|
||||
for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
|
||||
assert(thread.renameTable[i] == frontEnd->renameTable[i]);
|
||||
|
||||
assert(thread.renameTable[i] == backEnd->renameTable[i]);
|
||||
|
||||
DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (tickEvent.scheduled())
|
||||
tickEvent.squash();
|
||||
}
|
||||
@@ -278,13 +285,25 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||
{
|
||||
BaseCPU::takeOverFrom(oldCPU);
|
||||
|
||||
thread.trapPending = false;
|
||||
thread.inSyscall = false;
|
||||
|
||||
backEnd->takeOverFrom();
|
||||
frontEnd->takeOverFrom();
|
||||
frontEnd->renameTable.copyFrom(thread.renameTable);
|
||||
backEnd->renameTable.copyFrom(thread.renameTable);
|
||||
assert(!tickEvent.scheduled());
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Check rename table.
|
||||
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
|
||||
assert(thread.renameTable[i]->isResultReady());
|
||||
}
|
||||
#endif
|
||||
|
||||
// @todo: Fix hardcoded number
|
||||
// Clear out any old information in time buffer.
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
for (int i = 0; i < 15; ++i) {
|
||||
comm.advance();
|
||||
}
|
||||
|
||||
@@ -316,6 +335,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
|
||||
notIdleFraction++;
|
||||
scheduleTickEvent(delay);
|
||||
_status = Running;
|
||||
#if FULL_SYSTEM
|
||||
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
|
||||
thread.quiesceEvent->deschedule();
|
||||
#endif
|
||||
thread.setStatus(ThreadContext::Active);
|
||||
frontEnd->wakeFromQuiesce();
|
||||
}
|
||||
@@ -393,7 +416,7 @@ template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::resetStats()
|
||||
{
|
||||
startNumInst = numInst;
|
||||
// startNumInst = numInst;
|
||||
notIdleFraction = (_status != Idle);
|
||||
}
|
||||
|
||||
@@ -441,6 +464,15 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
|
||||
ozoneTC.serialize(os);
|
||||
nameOut(os, csprintf("%s.tickEvent", name()));
|
||||
tickEvent.serialize(os);
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// write out the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
nameOut(os, csprintf("%s.xc.0", name()));
|
||||
temp.copyXC(thread.getXCProxy());
|
||||
temp.serialize(os);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -451,6 +483,15 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion)
|
||||
UNSERIALIZE_ENUM(_status);
|
||||
ozoneTC.unserialize(cp, csprintf("%s.tc", section));
|
||||
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// read in the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
temp.copyXC(thread.getXCProxy());
|
||||
temp.unserialize(cp, csprintf("%s.xc.0", section));
|
||||
thread.getXCProxy()->copyArchRegs(temp.getProxy());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -810,7 +851,9 @@ OzoneCPU<Impl>::OzoneTC::halt()
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneTC::dumpFuncProfile()
|
||||
{ }
|
||||
{
|
||||
thread->dumpFuncProfile();
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class Impl>
|
||||
@@ -829,6 +872,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
|
||||
copyArchRegs(old_context);
|
||||
setCpuId(old_context->readCpuId());
|
||||
|
||||
thread->inst = old_context->getInst();
|
||||
#if !FULL_SYSTEM
|
||||
setFuncExeInst(old_context->readFuncExeInst());
|
||||
#else
|
||||
@@ -842,6 +886,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
|
||||
thread->quiesceEvent->tc = this;
|
||||
}
|
||||
|
||||
// Copy kernel stats pointer from old context.
|
||||
thread->kernelStats = old_context->getKernelStats();
|
||||
// storeCondFailures = 0;
|
||||
cpu->lockFlag = false;
|
||||
@@ -863,7 +908,11 @@ OzoneCPU<Impl>::OzoneTC::regStats(const std::string &name)
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneTC::serialize(std::ostream &os)
|
||||
{ }
|
||||
{
|
||||
// Once serialization is added, serialize the quiesce event and
|
||||
// kernel stats. Will need to make sure there aren't multiple
|
||||
// things that serialize them.
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
@@ -896,16 +945,14 @@ template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneTC::profileClear()
|
||||
{
|
||||
if (thread->profile)
|
||||
thread->profile->clear();
|
||||
thread->profileClear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneCPU<Impl>::OzoneTC::profileSample()
|
||||
{
|
||||
if (thread->profile)
|
||||
thread->profile->sample(thread->profileNode, thread->profilePC);
|
||||
thread->profileSample();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -916,7 +963,6 @@ OzoneCPU<Impl>::OzoneTC::getThreadNum()
|
||||
return thread->readTid();
|
||||
}
|
||||
|
||||
// Also somewhat obnoxious. Really only used for the TLB fault.
|
||||
template <class Impl>
|
||||
TheISA::MachInst
|
||||
OzoneCPU<Impl>::OzoneTC::getInst()
|
||||
@@ -934,14 +980,20 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
|
||||
cpu->frontEnd->setPC(thread->PC);
|
||||
cpu->frontEnd->setNextPC(thread->nextPC);
|
||||
|
||||
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
|
||||
if (i < TheISA::FP_Base_DepTag) {
|
||||
thread->renameTable[i]->setIntResult(tc->readIntReg(i));
|
||||
} else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
|
||||
int fp_idx = i - TheISA::FP_Base_DepTag;
|
||||
thread->renameTable[i]->setDoubleResult(
|
||||
tc->readFloatReg(fp_idx, 64));
|
||||
}
|
||||
// First loop through the integer registers.
|
||||
for (int i = 0; i < TheISA::NumIntRegs; ++i) {
|
||||
/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
|
||||
"now has data %lli.\n",
|
||||
i, thread->renameTable[i]->readIntResult(),
|
||||
tc->readIntReg(i));
|
||||
*/
|
||||
thread->renameTable[i]->setIntResult(tc->readIntReg(i));
|
||||
}
|
||||
|
||||
// Then loop through the floating point registers.
|
||||
for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
|
||||
int fp_idx = i + TheISA::FP_Base_DepTag;
|
||||
thread->renameTable[fp_idx]->setIntResult(tc->readFloatRegBits(i));
|
||||
}
|
||||
|
||||
#if !FULL_SYSTEM
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <deque>
|
||||
|
||||
#include "arch/utility.hh"
|
||||
#include "base/timebuf.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/bpred_unit.hh"
|
||||
#include "cpu/ozone/rename_table.hh"
|
||||
@@ -246,15 +247,21 @@ class FrontEnd
|
||||
void dumpInsts();
|
||||
|
||||
private:
|
||||
TimeBuffer<int> numInstsReady;
|
||||
|
||||
typedef typename std::deque<DynInstPtr> InstBuff;
|
||||
typedef typename InstBuff::iterator InstBuffIt;
|
||||
|
||||
InstBuff feBuffer;
|
||||
|
||||
InstBuff instBuffer;
|
||||
|
||||
int instBufferSize;
|
||||
|
||||
int maxInstBufferSize;
|
||||
|
||||
int latency;
|
||||
|
||||
int width;
|
||||
|
||||
int freeRegs;
|
||||
|
||||
@@ -92,8 +92,10 @@ FrontEnd<Impl>::FrontEnd(Params *params)
|
||||
: branchPred(params),
|
||||
icachePort(this),
|
||||
mem(params->mem),
|
||||
numInstsReady(params->frontEndLatency, 0),
|
||||
instBufferSize(0),
|
||||
maxInstBufferSize(params->maxInstBufferSize),
|
||||
latency(params->frontEndLatency),
|
||||
width(params->frontEndWidth),
|
||||
freeRegs(params->numPhysicalRegs),
|
||||
numPhysRegs(params->numPhysicalRegs),
|
||||
@@ -326,6 +328,18 @@ FrontEnd<Impl>::tick()
|
||||
if (switchedOut)
|
||||
return;
|
||||
|
||||
for (int insts_to_queue = numInstsReady[-latency];
|
||||
!instBuffer.empty() && insts_to_queue;
|
||||
--insts_to_queue)
|
||||
{
|
||||
DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
|
||||
instBuffer.front()->seqNum);
|
||||
feBuffer.push_back(instBuffer.front());
|
||||
instBuffer.pop_front();
|
||||
}
|
||||
|
||||
numInstsReady.advance();
|
||||
|
||||
// @todo: Maybe I want to just have direct communication...
|
||||
if (fromCommit->doneSeqNum) {
|
||||
branchPred.update(fromCommit->doneSeqNum, 0);
|
||||
@@ -339,8 +353,8 @@ FrontEnd<Impl>::tick()
|
||||
cacheBlkValid = true;
|
||||
|
||||
status = Running;
|
||||
if (barrierInst)
|
||||
status = SerializeBlocked;
|
||||
// if (barrierInst)
|
||||
// status = SerializeBlocked;
|
||||
if (freeRegs <= 0)
|
||||
status = RenameBlocked;
|
||||
checkBE();
|
||||
@@ -414,11 +428,12 @@ FrontEnd<Impl>::tick()
|
||||
// latency
|
||||
instBuffer.push_back(inst);
|
||||
++instBufferSize;
|
||||
numInstsReady[0]++;
|
||||
++num_inst;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (inst->isQuiesce()) {
|
||||
warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
|
||||
// warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
|
||||
status = QuiescePending;
|
||||
break;
|
||||
}
|
||||
@@ -572,10 +587,10 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
|
||||
|
||||
// Change status over to SerializeBlocked so that other stages know
|
||||
// what this is blocked on.
|
||||
status = SerializeBlocked;
|
||||
// status = SerializeBlocked;
|
||||
|
||||
barrierInst = inst;
|
||||
return true;
|
||||
// barrierInst = inst;
|
||||
// return true;
|
||||
} else if ((inst->isStoreConditional() || inst->isSerializeAfter())
|
||||
&& !inst->isSerializeHandled()) {
|
||||
DPRINTF(FE, "Serialize after instruction encountered.\n");
|
||||
@@ -620,6 +635,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
|
||||
instruction->fault = fault;
|
||||
instruction->setCanIssue();
|
||||
instBuffer.push_back(instruction);
|
||||
numInstsReady[0]++;
|
||||
++instBufferSize;
|
||||
}
|
||||
|
||||
@@ -649,6 +665,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
|
||||
freeRegs+= inst->numDestRegs();
|
||||
}
|
||||
|
||||
while (!feBuffer.empty() &&
|
||||
feBuffer.back()->seqNum > squash_num) {
|
||||
DynInstPtr inst = feBuffer.back();
|
||||
|
||||
DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->clearDependents();
|
||||
|
||||
feBuffer.pop_back();
|
||||
--instBufferSize;
|
||||
|
||||
freeRegs+= inst->numDestRegs();
|
||||
}
|
||||
|
||||
// Copy over rename table from the back end.
|
||||
renameTable.copyFrom(backEnd->renameTable);
|
||||
|
||||
@@ -666,12 +697,12 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
|
||||
DPRINTF(FE, "Squashing outstanding Icache access.\n");
|
||||
memReq = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
if (status == SerializeBlocked) {
|
||||
assert(barrierInst->seqNum > squash_num);
|
||||
barrierInst = NULL;
|
||||
}
|
||||
|
||||
*/
|
||||
// Unless this squash originated from the front end, we're probably
|
||||
// in running mode now.
|
||||
// Actually might want to make this latency dependent.
|
||||
@@ -683,13 +714,22 @@ template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
FrontEnd<Impl>::getInst()
|
||||
{
|
||||
if (instBufferSize == 0) {
|
||||
if (feBuffer.empty()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DynInstPtr inst = instBuffer.front();
|
||||
DynInstPtr inst = feBuffer.front();
|
||||
|
||||
instBuffer.pop_front();
|
||||
if (inst->isSerializeBefore() || inst->isIprAccess()) {
|
||||
DPRINTF(FE, "Back end is getting a serialize before inst\n");
|
||||
if (!backEnd->robEmpty()) {
|
||||
DPRINTF(FE, "Rob is not empty yet, not returning inst\n");
|
||||
return NULL;
|
||||
}
|
||||
inst->clearSerializeBefore();
|
||||
}
|
||||
|
||||
feBuffer.pop_front();
|
||||
|
||||
--instBufferSize;
|
||||
|
||||
@@ -784,11 +824,11 @@ FrontEnd<Impl>::updateStatus()
|
||||
}
|
||||
|
||||
if (status == BEBlocked && !be_block) {
|
||||
if (barrierInst) {
|
||||
status = SerializeBlocked;
|
||||
} else {
|
||||
// if (barrierInst) {
|
||||
// status = SerializeBlocked;
|
||||
// } else {
|
||||
status = Running;
|
||||
}
|
||||
// }
|
||||
ret_val = true;
|
||||
}
|
||||
return ret_val;
|
||||
@@ -810,6 +850,7 @@ template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
FrontEnd<Impl>::getInstFromCacheline()
|
||||
{
|
||||
/*
|
||||
if (status == SerializeComplete) {
|
||||
DynInstPtr inst = barrierInst;
|
||||
status = Running;
|
||||
@@ -817,7 +858,7 @@ FrontEnd<Impl>::getInstFromCacheline()
|
||||
inst->clearSerializeBefore();
|
||||
return inst;
|
||||
}
|
||||
|
||||
*/
|
||||
InstSeqNum inst_seq;
|
||||
MachInst inst;
|
||||
// @todo: Fix this magic number used here to handle word offset (and
|
||||
@@ -932,6 +973,7 @@ FrontEnd<Impl>::doSwitchOut()
|
||||
squash(0, 0);
|
||||
instBuffer.clear();
|
||||
instBufferSize = 0;
|
||||
feBuffer.clear();
|
||||
status = Idle;
|
||||
}
|
||||
|
||||
|
||||
@@ -284,7 +284,7 @@ InorderBackEnd<Impl>::executeInsts()
|
||||
}
|
||||
|
||||
inst->setExecuted();
|
||||
inst->setCompleted();
|
||||
inst->setResultReady();
|
||||
inst->setCanCommit();
|
||||
|
||||
instList.pop_front();
|
||||
|
||||
@@ -850,13 +850,13 @@ template <class Impl>
|
||||
void
|
||||
InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
|
||||
{
|
||||
OpClass op_class = ready_inst->opClass();
|
||||
// OpClass op_class = ready_inst->opClass();
|
||||
|
||||
readyInsts.push(ready_inst);
|
||||
|
||||
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
|
||||
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
|
||||
ready_inst->readPC(), op_class, ready_inst->seqNum);
|
||||
ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
|
||||
}
|
||||
/*
|
||||
template <class Impl>
|
||||
@@ -1177,11 +1177,11 @@ InstQueue<Impl>::addIfReady(DynInstPtr &inst)
|
||||
return;
|
||||
}
|
||||
|
||||
OpClass op_class = inst->opClass();
|
||||
// OpClass op_class = inst->opClass();
|
||||
|
||||
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
|
||||
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
|
||||
inst->readPC(), op_class, inst->seqNum);
|
||||
inst->readPC(), inst->opClass(), inst->seqNum);
|
||||
|
||||
readyInsts.push(inst);
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ class LWBackEnd
|
||||
TimeBuffer<IssueToExec> i2e;
|
||||
typename TimeBuffer<IssueToExec>::wire instsToExecute;
|
||||
TimeBuffer<ExecToCommit> e2c;
|
||||
TimeBuffer<Writeback> numInstsToWB;
|
||||
TimeBuffer<int> numInstsToWB;
|
||||
|
||||
TimeBuffer<CommStruct> *comm;
|
||||
typename TimeBuffer<CommStruct>::wire toIEW;
|
||||
@@ -139,7 +139,7 @@ class LWBackEnd
|
||||
|
||||
Tick lastCommitCycle;
|
||||
|
||||
bool robEmpty() { return instList.empty(); }
|
||||
bool robEmpty() { return numInsts == 0; }
|
||||
|
||||
bool isFull() { return numInsts >= numROBEntries; }
|
||||
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
|
||||
@@ -194,6 +194,7 @@ class LWBackEnd
|
||||
}
|
||||
|
||||
void instToCommit(DynInstPtr &inst);
|
||||
void readyInstsForCommit();
|
||||
|
||||
void switchOut();
|
||||
void doSwitchOut();
|
||||
@@ -255,12 +256,13 @@ class LWBackEnd
|
||||
|
||||
RenameTable<Impl> renameTable;
|
||||
private:
|
||||
int latency;
|
||||
|
||||
// General back end width. Used if the more specific isn't given.
|
||||
int width;
|
||||
|
||||
// Dispatch width.
|
||||
int dispatchWidth;
|
||||
int numDispatchEntries;
|
||||
int dispatchSize;
|
||||
|
||||
int waitingInsts;
|
||||
@@ -285,6 +287,7 @@ class LWBackEnd
|
||||
|
||||
int numROBEntries;
|
||||
int numInsts;
|
||||
bool lsqLimits;
|
||||
|
||||
std::set<InstSeqNum> waitingMemOps;
|
||||
typedef std::set<InstSeqNum>::iterator MemIt;
|
||||
@@ -295,9 +298,6 @@ class LWBackEnd
|
||||
InstSeqNum squashSeqNum;
|
||||
Addr squashNextPC;
|
||||
|
||||
Fault faultFromFetch;
|
||||
bool fetchHasFault;
|
||||
|
||||
bool switchedOut;
|
||||
bool switchPending;
|
||||
|
||||
@@ -321,8 +321,6 @@ class LWBackEnd
|
||||
std::list<DynInstPtr> replayList;
|
||||
std::list<DynInstPtr> writeback;
|
||||
|
||||
int latency;
|
||||
|
||||
int squashLatency;
|
||||
|
||||
bool exactFullStall;
|
||||
@@ -331,37 +329,39 @@ class LWBackEnd
|
||||
/* Stats::Scalar<> dcacheStallCycles;
|
||||
Counter lastDcacheStall;
|
||||
*/
|
||||
Stats::Vector<> rob_cap_events;
|
||||
Stats::Vector<> rob_cap_inst_count;
|
||||
Stats::Vector<> iq_cap_events;
|
||||
Stats::Vector<> iq_cap_inst_count;
|
||||
Stats::Vector<> robCapEvents;
|
||||
Stats::Vector<> robCapInstCount;
|
||||
Stats::Vector<> iqCapEvents;
|
||||
Stats::Vector<> iqCapInstCount;
|
||||
// total number of instructions executed
|
||||
Stats::Vector<> exe_inst;
|
||||
Stats::Vector<> exe_swp;
|
||||
Stats::Vector<> exe_nop;
|
||||
Stats::Vector<> exe_refs;
|
||||
Stats::Vector<> exe_loads;
|
||||
Stats::Vector<> exe_branches;
|
||||
Stats::Vector<> exeInst;
|
||||
Stats::Vector<> exeSwp;
|
||||
Stats::Vector<> exeNop;
|
||||
Stats::Vector<> exeRefs;
|
||||
Stats::Vector<> exeLoads;
|
||||
Stats::Vector<> exeBranches;
|
||||
|
||||
Stats::Vector<> issued_ops;
|
||||
Stats::Vector<> issuedOps;
|
||||
|
||||
// total number of loads forwaded from LSQ stores
|
||||
Stats::Vector<> lsq_forw_loads;
|
||||
Stats::Vector<> lsqForwLoads;
|
||||
|
||||
// total number of loads ignored due to invalid addresses
|
||||
Stats::Vector<> inv_addr_loads;
|
||||
Stats::Vector<> invAddrLoads;
|
||||
|
||||
// total number of software prefetches ignored due to invalid addresses
|
||||
Stats::Vector<> inv_addr_swpfs;
|
||||
Stats::Vector<> invAddrSwpfs;
|
||||
// ready loads blocked due to memory disambiguation
|
||||
Stats::Vector<> lsq_blocked_loads;
|
||||
Stats::Vector<> lsqBlockedLoads;
|
||||
|
||||
Stats::Scalar<> lsqInversion;
|
||||
|
||||
Stats::Vector<> n_issued_dist;
|
||||
Stats::VectorDistribution<> issue_delay_dist;
|
||||
Stats::Vector<> nIssuedDist;
|
||||
/*
|
||||
Stats::VectorDistribution<> issueDelayDist;
|
||||
|
||||
Stats::VectorDistribution<> queue_res_dist;
|
||||
Stats::VectorDistribution<> queueResDist;
|
||||
*/
|
||||
/*
|
||||
Stats::Vector<> stat_fu_busy;
|
||||
Stats::Vector2d<> stat_fuBusy;
|
||||
@@ -379,37 +379,37 @@ class LWBackEnd
|
||||
Stats::Formula commit_ipb;
|
||||
Stats::Formula lsq_inv_rate;
|
||||
*/
|
||||
Stats::Vector<> writeback_count;
|
||||
Stats::Vector<> producer_inst;
|
||||
Stats::Vector<> consumer_inst;
|
||||
Stats::Vector<> wb_penalized;
|
||||
Stats::Vector<> writebackCount;
|
||||
Stats::Vector<> producerInst;
|
||||
Stats::Vector<> consumerInst;
|
||||
Stats::Vector<> wbPenalized;
|
||||
|
||||
Stats::Formula wb_rate;
|
||||
Stats::Formula wb_fanout;
|
||||
Stats::Formula wb_penalized_rate;
|
||||
Stats::Formula wbRate;
|
||||
Stats::Formula wbFanout;
|
||||
Stats::Formula wbPenalizedRate;
|
||||
|
||||
// total number of instructions committed
|
||||
Stats::Vector<> stat_com_inst;
|
||||
Stats::Vector<> stat_com_swp;
|
||||
Stats::Vector<> stat_com_refs;
|
||||
Stats::Vector<> stat_com_loads;
|
||||
Stats::Vector<> stat_com_membars;
|
||||
Stats::Vector<> stat_com_branches;
|
||||
Stats::Vector<> statComInst;
|
||||
Stats::Vector<> statComSwp;
|
||||
Stats::Vector<> statComRefs;
|
||||
Stats::Vector<> statComLoads;
|
||||
Stats::Vector<> statComMembars;
|
||||
Stats::Vector<> statComBranches;
|
||||
|
||||
Stats::Distribution<> n_committed_dist;
|
||||
Stats::Distribution<> nCommittedDist;
|
||||
|
||||
Stats::Scalar<> commit_eligible_samples;
|
||||
Stats::Vector<> commit_eligible;
|
||||
Stats::Scalar<> commitEligibleSamples;
|
||||
Stats::Vector<> commitEligible;
|
||||
|
||||
Stats::Vector<> squashedInsts;
|
||||
Stats::Vector<> ROBSquashedInsts;
|
||||
|
||||
Stats::Scalar<> ROB_fcount;
|
||||
Stats::Formula ROB_full_rate;
|
||||
Stats::Scalar<> ROBFcount;
|
||||
Stats::Formula ROBFullRate;
|
||||
|
||||
Stats::Vector<> ROB_count; // cumulative ROB occupancy
|
||||
Stats::Formula ROB_occ_rate;
|
||||
Stats::VectorDistribution<> ROB_occ_dist;
|
||||
Stats::Vector<> ROBCount; // cumulative ROB occupancy
|
||||
Stats::Formula ROBOccRate;
|
||||
// Stats::VectorDistribution<> ROBOccDist;
|
||||
public:
|
||||
void dumpInsts();
|
||||
|
||||
|
||||
@@ -141,13 +141,14 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
|
||||
|
||||
template <class Impl>
|
||||
LWBackEnd<Impl>::LWBackEnd(Params *params)
|
||||
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
|
||||
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
|
||||
trapSquash(false), tcSquash(false),
|
||||
width(params->backEndWidth), exactFullStall(true)
|
||||
latency(params->backEndLatency),
|
||||
width(params->backEndWidth), lsqLimits(params->lsqLimits),
|
||||
exactFullStall(true)
|
||||
{
|
||||
numROBEntries = params->numROBEntries;
|
||||
numInsts = 0;
|
||||
numDispatchEntries = 32;
|
||||
maxOutstandingMemOps = params->maxOutstandingMemOps;
|
||||
numWaitingMemOps = 0;
|
||||
waitingInsts = 0;
|
||||
@@ -184,78 +185,79 @@ void
|
||||
LWBackEnd<Impl>::regStats()
|
||||
{
|
||||
using namespace Stats;
|
||||
rob_cap_events
|
||||
LSQ.regStats();
|
||||
|
||||
robCapEvents
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ROB:cap_events")
|
||||
.desc("number of cycles where ROB cap was active")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
rob_cap_inst_count
|
||||
robCapInstCount
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ROB:cap_inst")
|
||||
.desc("number of instructions held up by ROB cap")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
iq_cap_events
|
||||
iqCapEvents
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() +".IQ:cap_events" )
|
||||
.desc("number of cycles where IQ cap was active")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
iq_cap_inst_count
|
||||
iqCapInstCount
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".IQ:cap_inst")
|
||||
.desc("number of instructions held up by IQ cap")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
|
||||
exe_inst
|
||||
exeInst
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:count")
|
||||
.desc("number of insts issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
exe_swp
|
||||
exeSwp
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:swp")
|
||||
.desc("number of swp insts issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
exe_nop
|
||||
exeNop
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:nop")
|
||||
.desc("number of nop insts issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
exe_refs
|
||||
exeRefs
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:refs")
|
||||
.desc("number of memory reference insts issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
exe_loads
|
||||
exeLoads
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:loads")
|
||||
.desc("number of load insts issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
exe_branches
|
||||
exeBranches
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:branches")
|
||||
.desc("Number of branches issued")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
issued_ops
|
||||
issuedOps
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:op_count")
|
||||
.desc("number of insts issued")
|
||||
@@ -272,28 +274,28 @@ LWBackEnd<Impl>::regStats()
|
||||
//
|
||||
// Other stats
|
||||
//
|
||||
lsq_forw_loads
|
||||
lsqForwLoads
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".LSQ:forw_loads")
|
||||
.desc("number of loads forwarded via LSQ")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
inv_addr_loads
|
||||
invAddrLoads
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:addr_loads")
|
||||
.desc("number of invalid-address loads")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
inv_addr_swpfs
|
||||
invAddrSwpfs
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ISSUE:addr_swpfs")
|
||||
.desc("number of invalid-address SW prefetches")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
lsq_blocked_loads
|
||||
lsqBlockedLoads
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".LSQ:blocked_loads")
|
||||
.desc("number of ready loads not issued due to memory disambiguation")
|
||||
@@ -305,51 +307,52 @@ LWBackEnd<Impl>::regStats()
|
||||
.desc("Number of times LSQ instruction issued early")
|
||||
;
|
||||
|
||||
n_issued_dist
|
||||
nIssuedDist
|
||||
.init(issueWidth + 1)
|
||||
.name(name() + ".ISSUE:issued_per_cycle")
|
||||
.desc("Number of insts issued each cycle")
|
||||
.flags(total | pdf | dist)
|
||||
;
|
||||
issue_delay_dist
|
||||
/*
|
||||
issueDelayDist
|
||||
.init(Num_OpClasses,0,99,2)
|
||||
.name(name() + ".ISSUE:")
|
||||
.desc("cycles from operands ready to issue")
|
||||
.flags(pdf | cdf)
|
||||
;
|
||||
|
||||
queue_res_dist
|
||||
queueResDist
|
||||
.init(Num_OpClasses, 0, 99, 2)
|
||||
.name(name() + ".IQ:residence:")
|
||||
.desc("cycles from dispatch to issue")
|
||||
.flags(total | pdf | cdf )
|
||||
;
|
||||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
queue_res_dist.subname(i, opClassStrings[i]);
|
||||
queueResDist.subname(i, opClassStrings[i]);
|
||||
}
|
||||
|
||||
writeback_count
|
||||
*/
|
||||
writebackCount
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".WB:count")
|
||||
.desc("cumulative count of insts written-back")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
producer_inst
|
||||
producerInst
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".WB:producers")
|
||||
.desc("num instructions producing a value")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
consumer_inst
|
||||
consumerInst
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".WB:consumers")
|
||||
.desc("num instructions consuming a value")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
wb_penalized
|
||||
wbPenalized
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".WB:penalized")
|
||||
.desc("number of instrctions required to write to 'other' IQ")
|
||||
@@ -357,71 +360,71 @@ LWBackEnd<Impl>::regStats()
|
||||
;
|
||||
|
||||
|
||||
wb_penalized_rate
|
||||
wbPenalizedRate
|
||||
.name(name() + ".WB:penalized_rate")
|
||||
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
wb_penalized_rate = wb_penalized / writeback_count;
|
||||
wbPenalizedRate = wbPenalized / writebackCount;
|
||||
|
||||
wb_fanout
|
||||
wbFanout
|
||||
.name(name() + ".WB:fanout")
|
||||
.desc("average fanout of values written-back")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
wb_fanout = producer_inst / consumer_inst;
|
||||
wbFanout = producerInst / consumerInst;
|
||||
|
||||
wb_rate
|
||||
wbRate
|
||||
.name(name() + ".WB:rate")
|
||||
.desc("insts written-back per cycle")
|
||||
.flags(total)
|
||||
;
|
||||
wb_rate = writeback_count / cpu->numCycles;
|
||||
wbRate = writebackCount / cpu->numCycles;
|
||||
|
||||
stat_com_inst
|
||||
statComInst
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:count")
|
||||
.desc("Number of instructions committed")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
stat_com_swp
|
||||
statComSwp
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:swp_count")
|
||||
.desc("Number of s/w prefetches committed")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
stat_com_refs
|
||||
statComRefs
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:refs")
|
||||
.desc("Number of memory references committed")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
stat_com_loads
|
||||
statComLoads
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:loads")
|
||||
.desc("Number of loads committed")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
stat_com_membars
|
||||
statComMembars
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:membars")
|
||||
.desc("Number of memory barriers committed")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
stat_com_branches
|
||||
statComBranches
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:branches")
|
||||
.desc("Number of branches committed")
|
||||
.flags(total)
|
||||
;
|
||||
n_committed_dist
|
||||
nCommittedDist
|
||||
.init(0,commitWidth,1)
|
||||
.name(name() + ".COM:committed_per_cycle")
|
||||
.desc("Number of insts commited each cycle")
|
||||
@@ -441,14 +444,14 @@ LWBackEnd<Impl>::regStats()
|
||||
// -> The standard deviation is computed only over cycles where
|
||||
// we reached the BW limit
|
||||
//
|
||||
commit_eligible
|
||||
commitEligible
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".COM:bw_limited")
|
||||
.desc("number of insts not committed due to BW limits")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
commit_eligible_samples
|
||||
commitEligibleSamples
|
||||
.name(name() + ".COM:bw_lim_events")
|
||||
.desc("number cycles where commit BW limit reached")
|
||||
;
|
||||
@@ -465,37 +468,38 @@ LWBackEnd<Impl>::regStats()
|
||||
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
|
||||
;
|
||||
|
||||
ROB_fcount
|
||||
ROBFcount
|
||||
.name(name() + ".ROB:full_count")
|
||||
.desc("number of cycles where ROB was full")
|
||||
;
|
||||
|
||||
ROB_count
|
||||
ROBCount
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".ROB:occupancy")
|
||||
.desc(name() + ".ROB occupancy (cumulative)")
|
||||
.flags(total)
|
||||
;
|
||||
|
||||
ROB_full_rate
|
||||
ROBFullRate
|
||||
.name(name() + ".ROB:full_rate")
|
||||
.desc("ROB full per cycle")
|
||||
;
|
||||
ROB_full_rate = ROB_fcount / cpu->numCycles;
|
||||
ROBFullRate = ROBFcount / cpu->numCycles;
|
||||
|
||||
ROB_occ_rate
|
||||
ROBOccRate
|
||||
.name(name() + ".ROB:occ_rate")
|
||||
.desc("ROB occupancy rate")
|
||||
.flags(total)
|
||||
;
|
||||
ROB_occ_rate = ROB_count / cpu->numCycles;
|
||||
|
||||
ROB_occ_dist
|
||||
ROBOccRate = ROBCount / cpu->numCycles;
|
||||
/*
|
||||
ROBOccDist
|
||||
.init(cpu->number_of_threads,0,numROBEntries,2)
|
||||
.name(name() + ".ROB:occ_dist")
|
||||
.desc("ROB Occupancy per cycle")
|
||||
.flags(total | cdf)
|
||||
;
|
||||
*/
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -588,18 +592,22 @@ LWBackEnd<Impl>::tick()
|
||||
{
|
||||
DPRINTF(BE, "Ticking back end\n");
|
||||
|
||||
// Read in any done instruction information and update the IQ or LSQ.
|
||||
updateStructures();
|
||||
|
||||
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
|
||||
cpu->signalSwitched();
|
||||
return;
|
||||
}
|
||||
|
||||
ROB_count[0]+= numInsts;
|
||||
readyInstsForCommit();
|
||||
|
||||
numInstsToWB.advance();
|
||||
|
||||
ROBCount[0]+= numInsts;
|
||||
|
||||
wbCycle = 0;
|
||||
|
||||
// Read in any done instruction information and update the IQ or LSQ.
|
||||
updateStructures();
|
||||
|
||||
#if FULL_SYSTEM
|
||||
checkInterrupts();
|
||||
#endif
|
||||
@@ -674,6 +682,10 @@ LWBackEnd<Impl>::dispatchInsts()
|
||||
while (numInsts < numROBEntries &&
|
||||
numWaitingMemOps < maxOutstandingMemOps) {
|
||||
// Get instruction from front of time buffer
|
||||
if (lsqLimits && LSQ.isFull()) {
|
||||
break;
|
||||
}
|
||||
|
||||
DynInstPtr inst = frontEnd->getInst();
|
||||
if (!inst) {
|
||||
break;
|
||||
@@ -732,6 +744,7 @@ LWBackEnd<Impl>::dispatchInsts()
|
||||
inst->setIssued();
|
||||
inst->setExecuted();
|
||||
inst->setCanCommit();
|
||||
numInstsToWB[0]++;
|
||||
} else {
|
||||
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
|
||||
"exeList.\n",
|
||||
@@ -866,8 +879,17 @@ LWBackEnd<Impl>::executeInsts()
|
||||
if (inst->isLoad()) {
|
||||
LSQ.executeLoad(inst);
|
||||
} else if (inst->isStore()) {
|
||||
LSQ.executeStore(inst);
|
||||
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
|
||||
Fault fault = LSQ.executeStore(inst);
|
||||
|
||||
if (!inst->isStoreConditional() && fault == NoFault) {
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
} else if (fault != NoFault) {
|
||||
// If the instruction faulted, then we need to send it along to commit
|
||||
// without the instruction completing.
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
@@ -908,36 +930,54 @@ LWBackEnd<Impl>::executeInsts()
|
||||
}
|
||||
}
|
||||
|
||||
issued_ops[0]+= num_executed;
|
||||
n_issued_dist[num_executed]++;
|
||||
issuedOps[0]+= num_executed;
|
||||
nIssuedDist[num_executed]++;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
|
||||
{
|
||||
|
||||
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->setCanCommit();
|
||||
|
||||
if (inst->isExecuted()) {
|
||||
inst->setResultReady();
|
||||
int dependents = wakeDependents(inst);
|
||||
if (dependents) {
|
||||
producer_inst[0]++;
|
||||
consumer_inst[0]+= dependents;
|
||||
producerInst[0]++;
|
||||
consumerInst[0]+= dependents;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeback_count[0]++;
|
||||
writeback.push_back(inst);
|
||||
|
||||
numInstsToWB[0]++;
|
||||
|
||||
writebackCount[0]++;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::readyInstsForCommit()
|
||||
{
|
||||
for (int i = numInstsToWB[-latency];
|
||||
!writeback.empty() && i;
|
||||
--i)
|
||||
{
|
||||
DynInstPtr inst = writeback.front();
|
||||
writeback.pop_front();
|
||||
if (!inst->isSquashed()) {
|
||||
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
|
||||
inst->seqNum, inst->readPC());
|
||||
|
||||
inst->setCanCommit();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template <class Impl>
|
||||
void
|
||||
@@ -1010,7 +1050,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
|
||||
// or store inst. Signal backwards that it should be executed.
|
||||
if (!inst->isExecuted()) {
|
||||
if (inst->isNonSpeculative() ||
|
||||
inst->isStoreConditional() ||
|
||||
(inst->isStoreConditional() && inst->getFault() == NoFault) ||
|
||||
inst->isMemBarrier() ||
|
||||
inst->isWriteBarrier()) {
|
||||
#if !FULL_SYSTEM
|
||||
@@ -1151,6 +1191,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
|
||||
++freed_regs;
|
||||
}
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread->profile) {
|
||||
// bool usermode =
|
||||
// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
|
||||
// thread->profilePC = usermode ? 1 : inst->readPC();
|
||||
thread->profilePC = inst->readPC();
|
||||
ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
|
||||
inst->staticInst);
|
||||
|
||||
if (node)
|
||||
thread->profileNode = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (inst->traceData) {
|
||||
inst->traceData->setFetchSeq(inst->seqNum);
|
||||
inst->traceData->setCPSeq(thread->numInst);
|
||||
@@ -1158,6 +1212,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
|
||||
inst->traceData = NULL;
|
||||
}
|
||||
|
||||
if (inst->isCopy())
|
||||
panic("Should not commit any copy instructions!");
|
||||
|
||||
inst->clearDependents();
|
||||
|
||||
frontEnd->addFreeRegs(freed_regs);
|
||||
@@ -1207,9 +1264,9 @@ LWBackEnd<Impl>::commitInsts()
|
||||
while (!instList.empty() && inst_num < commitWidth) {
|
||||
if (instList.back()->isSquashed()) {
|
||||
instList.back()->clearDependents();
|
||||
ROBSquashedInsts[instList.back()->threadNumber]++;
|
||||
instList.pop_back();
|
||||
--numInsts;
|
||||
ROBSquashedInsts[instList.back()->threadNumber]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1221,7 +1278,7 @@ LWBackEnd<Impl>::commitInsts()
|
||||
break;
|
||||
}
|
||||
}
|
||||
n_committed_dist.sample(inst_num);
|
||||
nCommittedDist.sample(inst_num);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -1231,10 +1288,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
||||
LSQ.squash(sn);
|
||||
|
||||
int freed_regs = 0;
|
||||
InstListIt waiting_list_end = waitingList.end();
|
||||
InstListIt insts_end_it = waitingList.end();
|
||||
InstListIt insts_it = waitingList.begin();
|
||||
|
||||
while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
|
||||
while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
|
||||
{
|
||||
if ((*insts_it)->isSquashed()) {
|
||||
++insts_it;
|
||||
@@ -1260,6 +1317,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
||||
while (!instList.empty() && (*insts_it)->seqNum > sn)
|
||||
{
|
||||
if ((*insts_it)->isSquashed()) {
|
||||
panic("Instruction should not be already squashed and on list!");
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
@@ -1291,18 +1349,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
||||
--numInsts;
|
||||
}
|
||||
|
||||
insts_it = waitingList.begin();
|
||||
while (!waitingList.empty() && insts_it != waitingList.end()) {
|
||||
if ((*insts_it)->seqNum < sn) {
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
assert((*insts_it)->isSquashed());
|
||||
|
||||
waitingList.erase(insts_it++);
|
||||
waitingInsts--;
|
||||
}
|
||||
|
||||
while (memBarrier && memBarrier->seqNum > sn) {
|
||||
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
|
||||
"squashed)\n", memBarrier->seqNum);
|
||||
@@ -1320,6 +1366,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
|
||||
}
|
||||
}
|
||||
|
||||
insts_it = replayList.begin();
|
||||
insts_end_it = replayList.end();
|
||||
while (!replayList.empty() && insts_it != insts_end_it) {
|
||||
if ((*insts_it)->seqNum < sn) {
|
||||
++insts_it;
|
||||
continue;
|
||||
}
|
||||
assert((*insts_it)->isSquashed());
|
||||
|
||||
replayList.erase(insts_it++);
|
||||
}
|
||||
|
||||
frontEnd->addFreeRegs(freed_regs);
|
||||
}
|
||||
|
||||
@@ -1390,14 +1448,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
|
||||
frontEnd->squash(inst->seqNum - 1, inst->readPC());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::fetchFault(Fault &fault)
|
||||
{
|
||||
faultFromFetch = fault;
|
||||
fetchHasFault = true;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::switchOut()
|
||||
@@ -1416,17 +1466,25 @@ LWBackEnd<Impl>::doSwitchOut()
|
||||
// yet written back.
|
||||
assert(robEmpty());
|
||||
assert(!LSQ.hasStoresToWB());
|
||||
writeback.clear();
|
||||
for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
|
||||
numInstsToWB.advance();
|
||||
|
||||
// squash(0);
|
||||
assert(waitingList.empty());
|
||||
assert(instList.empty());
|
||||
assert(replayList.empty());
|
||||
assert(writeback.empty());
|
||||
LSQ.switchOut();
|
||||
|
||||
squash(0);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
|
||||
{
|
||||
switchedOut = false;
|
||||
assert(!squashPending);
|
||||
squashSeqNum = 0;
|
||||
squashNextPC = 0;
|
||||
tcSquash = false;
|
||||
trapSquash = false;
|
||||
|
||||
@@ -1451,27 +1509,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
|
||||
//
|
||||
#ifdef TARGET_ALPHA
|
||||
if (inst->isDataPrefetch())
|
||||
exe_swp[thread_number]++;
|
||||
exeSwp[thread_number]++;
|
||||
else
|
||||
exe_inst[thread_number]++;
|
||||
exeInst[thread_number]++;
|
||||
#else
|
||||
exe_inst[thread_number]++;
|
||||
exeInst[thread_number]++;
|
||||
#endif
|
||||
|
||||
//
|
||||
// Control operations
|
||||
//
|
||||
if (inst->isControl())
|
||||
exe_branches[thread_number]++;
|
||||
exeBranches[thread_number]++;
|
||||
|
||||
//
|
||||
// Memory operations
|
||||
//
|
||||
if (inst->isMemRef()) {
|
||||
exe_refs[thread_number]++;
|
||||
exeRefs[thread_number]++;
|
||||
|
||||
if (inst->isLoad())
|
||||
exe_loads[thread_number]++;
|
||||
exeLoads[thread_number]++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1491,33 +1549,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
|
||||
//
|
||||
#ifdef TARGET_ALPHA
|
||||
if (inst->isDataPrefetch()) {
|
||||
stat_com_swp[tid]++;
|
||||
statComSwp[tid]++;
|
||||
} else {
|
||||
stat_com_inst[tid]++;
|
||||
statComInst[tid]++;
|
||||
}
|
||||
#else
|
||||
stat_com_inst[tid]++;
|
||||
statComInst[tid]++;
|
||||
#endif
|
||||
|
||||
//
|
||||
// Control Instructions
|
||||
//
|
||||
if (inst->isControl())
|
||||
stat_com_branches[tid]++;
|
||||
statComBranches[tid]++;
|
||||
|
||||
//
|
||||
// Memory references
|
||||
//
|
||||
if (inst->isMemRef()) {
|
||||
stat_com_refs[tid]++;
|
||||
statComRefs[tid]++;
|
||||
|
||||
if (inst->isLoad()) {
|
||||
stat_com_loads[tid]++;
|
||||
statComLoads[tid]++;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->isMemBarrier()) {
|
||||
stat_com_membars[tid]++;
|
||||
statComMembars[tid]++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1569,6 +1627,45 @@ LWBackEnd<Impl>::dumpInsts()
|
||||
++num;
|
||||
}
|
||||
|
||||
inst_list_it = --(writeback.end());
|
||||
|
||||
cprintf("Writeback list size: %i\n", writeback.size());
|
||||
|
||||
while (inst_list_it != writeback.end())
|
||||
{
|
||||
cprintf("Instruction:%i\n",
|
||||
num);
|
||||
if (!(*inst_list_it)->isSquashed()) {
|
||||
if (!(*inst_list_it)->isIssued()) {
|
||||
++valid_num;
|
||||
cprintf("Count:%i\n", valid_num);
|
||||
} else if ((*inst_list_it)->isMemRef() &&
|
||||
!(*inst_list_it)->memOpDone) {
|
||||
// Loads that have not been marked as executed still count
|
||||
// towards the total instructions.
|
||||
++valid_num;
|
||||
cprintf("Count:%i\n", valid_num);
|
||||
}
|
||||
}
|
||||
|
||||
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
|
||||
"Issued:%i\nSquashed:%i\n",
|
||||
(*inst_list_it)->readPC(),
|
||||
(*inst_list_it)->seqNum,
|
||||
(*inst_list_it)->threadNumber,
|
||||
(*inst_list_it)->isIssued(),
|
||||
(*inst_list_it)->isSquashed());
|
||||
|
||||
if ((*inst_list_it)->isMemRef()) {
|
||||
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
|
||||
}
|
||||
|
||||
cprintf("\n");
|
||||
|
||||
inst_list_it--;
|
||||
++num;
|
||||
}
|
||||
|
||||
cprintf("Waiting list size: %i\n", waitingList.size());
|
||||
|
||||
inst_list_it = --(waitingList.end());
|
||||
|
||||
@@ -84,6 +84,8 @@ class OzoneLWLSQ {
|
||||
/** Returns the name of the LSQ unit. */
|
||||
std::string name() const;
|
||||
|
||||
void regStats();
|
||||
|
||||
/** Sets the CPU pointer. */
|
||||
void setCPU(OzoneCPU *cpu_ptr);
|
||||
|
||||
@@ -179,7 +181,7 @@ class OzoneLWLSQ {
|
||||
int numLoads() { return loads; }
|
||||
|
||||
/** Returns the number of stores in the SQ. */
|
||||
int numStores() { return stores; }
|
||||
int numStores() { return stores + storesInFlight; }
|
||||
|
||||
/** Returns if either the LQ or SQ is full. */
|
||||
bool isFull() { return lqFull() || sqFull(); }
|
||||
@@ -188,7 +190,7 @@ class OzoneLWLSQ {
|
||||
bool lqFull() { return loads >= (LQEntries - 1); }
|
||||
|
||||
/** Returns if the SQ is full. */
|
||||
bool sqFull() { return stores >= (SQEntries - 1); }
|
||||
bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
|
||||
|
||||
/** Debugging function to dump instructions in the LSQ. */
|
||||
void dumpInsts();
|
||||
@@ -223,7 +225,9 @@ class OzoneLWLSQ {
|
||||
void storePostSend(Packet *pkt, DynInstPtr &inst);
|
||||
|
||||
/** Completes the store at the specified index. */
|
||||
void completeStore(int store_idx);
|
||||
void completeStore(DynInstPtr &inst);
|
||||
|
||||
void removeStore(int store_idx);
|
||||
|
||||
/** Handles doing the retry. */
|
||||
void recvRetry();
|
||||
@@ -394,6 +398,10 @@ class OzoneLWLSQ {
|
||||
|
||||
int storesToWB;
|
||||
|
||||
public:
|
||||
int storesInFlight;
|
||||
|
||||
private:
|
||||
/// @todo Consider moving to a more advanced model with write vs read ports
|
||||
/** The number of cache ports available each cycle. */
|
||||
int cachePorts;
|
||||
@@ -403,6 +411,9 @@ class OzoneLWLSQ {
|
||||
|
||||
//list<InstSeqNum> mshrSeqNums;
|
||||
|
||||
/** Tota number of memory ordering violations. */
|
||||
Stats::Scalar<> lsqMemOrderViolation;
|
||||
|
||||
//Stats::Scalar<> dcacheStallCycles;
|
||||
Counter lastDcacheStall;
|
||||
|
||||
@@ -525,7 +536,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
|
||||
|
||||
store_size = (*sq_it).size;
|
||||
|
||||
if (store_size == 0) {
|
||||
if (store_size == 0 || (*sq_it).committed) {
|
||||
sq_it++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
|
||||
template <class Impl>
|
||||
OzoneLWLSQ<Impl>::OzoneLWLSQ()
|
||||
: switchedOut(false), dcachePort(this), loads(0), stores(0),
|
||||
storesToWB(0), stalled(false), isStoreBlocked(false),
|
||||
storesToWB(0), storesInFlight(0), stalled(false), isStoreBlocked(false),
|
||||
isLoadBlocked(false), loadBlockedHandled(false)
|
||||
{
|
||||
}
|
||||
@@ -173,6 +173,11 @@ OzoneLWLSQ<Impl>::name() const
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::regStats()
|
||||
{
|
||||
lsqMemOrderViolation
|
||||
.name(name() + ".memOrderViolation")
|
||||
.desc("Number of memory ordering violations");
|
||||
OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr)
|
||||
{
|
||||
cpu = cpu_ptr;
|
||||
@@ -321,7 +326,7 @@ unsigned
|
||||
OzoneLWLSQ<Impl>::numFreeEntries()
|
||||
{
|
||||
unsigned free_lq_entries = LQEntries - loads;
|
||||
unsigned free_sq_entries = SQEntries - stores;
|
||||
unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
|
||||
|
||||
// Both the LQ and SQ entries have an extra dummy entry to differentiate
|
||||
// empty/full conditions. Subtract 1 from the free entries.
|
||||
@@ -385,6 +390,9 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
|
||||
// Actually probably want the oldest faulting load
|
||||
if (load_fault != NoFault) {
|
||||
DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
|
||||
if (!(inst->req->flags & UNCACHEABLE && !inst->isAtCommit())) {
|
||||
inst->setExecuted();
|
||||
}
|
||||
// Maybe just set it as can commit here, although that might cause
|
||||
// some other problems with sending traps to the ROB too quickly.
|
||||
be->instToCommit(inst);
|
||||
@@ -461,6 +469,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
|
||||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
memDepViolator = (*lq_it);
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return TheISA::genMachineCheckFault();
|
||||
}
|
||||
@@ -553,8 +562,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
||||
|
||||
if ((*sq_it).size == 0 && !(*sq_it).completed) {
|
||||
sq_it--;
|
||||
completeStore(inst->sqIdx);
|
||||
|
||||
removeStore(inst->sqIdx);
|
||||
completeStore(inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -626,6 +635,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
||||
inst->sqIdx,inst->readPC(),
|
||||
req->paddr, *(req->data),
|
||||
inst->seqNum);
|
||||
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
|
||||
storesInFlight + 1);
|
||||
|
||||
if (dcacheInterface) {
|
||||
assert(!req->completionEvent);
|
||||
@@ -687,6 +698,8 @@ OzoneLWLSQ<Impl>::writebackStores()
|
||||
}
|
||||
sq_it--;
|
||||
}
|
||||
++storesInFlight;
|
||||
// removeStore(inst->sqIdx);
|
||||
} else {
|
||||
panic("Must HAVE DCACHE!!!!!\n");
|
||||
}
|
||||
@@ -704,7 +717,7 @@ void
|
||||
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
|
||||
{
|
||||
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
|
||||
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
|
||||
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
|
||||
|
||||
|
||||
LQIt lq_it = loadQueue.begin();
|
||||
@@ -881,7 +894,7 @@ OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
||||
OzoneLWLSQ<Impl>::removeStore(int store_idx)
|
||||
{
|
||||
SQHashIt sq_hash_it = SQItHash.find(store_idx);
|
||||
assert(sq_hash_it != SQItHash.end());
|
||||
@@ -891,8 +904,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
||||
(*sq_it).completed = true;
|
||||
DynInstPtr inst = (*sq_it).inst;
|
||||
|
||||
--storesToWB;
|
||||
|
||||
if (isStalled() &&
|
||||
inst->seqNum == stallingStoreIsn) {
|
||||
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
|
||||
@@ -910,6 +921,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
|
||||
SQItHash.erase(sq_hash_it);
|
||||
SQIndices.push(inst->sqIdx);
|
||||
storeQueue.erase(sq_it);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
|
||||
{
|
||||
--storesToWB;
|
||||
--stores;
|
||||
|
||||
inst->setCompleted();
|
||||
@@ -935,9 +953,14 @@ OzoneLWLSQ<Impl>::switchOut()
|
||||
switchedOut = true;
|
||||
|
||||
// Clear the queue to free up resources
|
||||
assert(stores == 0);
|
||||
assert(storeQueue.empty());
|
||||
assert(loads == 0);
|
||||
assert(loadQueue.empty());
|
||||
assert(storesInFlight == 0);
|
||||
storeQueue.clear();
|
||||
loadQueue.clear();
|
||||
loads = stores = storesToWB = 0;
|
||||
loads = stores = storesToWB = storesInFlight = 0;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
||||
@@ -71,10 +71,11 @@ class SimpleParams : public BaseCPU::Params
|
||||
|
||||
unsigned cachePorts;
|
||||
unsigned width;
|
||||
unsigned frontEndLatency;
|
||||
unsigned frontEndWidth;
|
||||
unsigned backEndLatency;
|
||||
unsigned backEndWidth;
|
||||
unsigned backEndSquashLatency;
|
||||
unsigned backEndLatency;
|
||||
unsigned maxInstBufferSize;
|
||||
unsigned numPhysicalRegs;
|
||||
unsigned maxOutstandingMemOps;
|
||||
@@ -150,6 +151,7 @@ class SimpleParams : public BaseCPU::Params
|
||||
//
|
||||
unsigned LQEntries;
|
||||
unsigned SQEntries;
|
||||
bool lsqLimits;
|
||||
|
||||
//
|
||||
// Memory dependence
|
||||
|
||||
@@ -34,9 +34,12 @@
|
||||
#include "arch/faults.hh"
|
||||
#include "arch/types.hh"
|
||||
#include "arch/regfile.hh"
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/process.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class Event;
|
||||
//class Process;
|
||||
@@ -65,8 +68,21 @@ struct OzoneThreadState : public ThreadState {
|
||||
#if FULL_SYSTEM
|
||||
OzoneThreadState(CPUType *_cpu, int _thread_num)
|
||||
: ThreadState(-1, _thread_num),
|
||||
intrflag(0), inSyscall(0), trapPending(0)
|
||||
cpu(_cpu), intrflag(0), inSyscall(0), trapPending(0)
|
||||
{
|
||||
if (cpu->params->profile) {
|
||||
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<OzoneThreadState,
|
||||
&OzoneThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
miscRegFile.clear();
|
||||
}
|
||||
#else
|
||||
@@ -130,6 +146,14 @@ struct OzoneThreadState : public ThreadState {
|
||||
|
||||
void setNextPC(uint64_t val)
|
||||
{ nextPC = val; }
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(xcProxy, *os);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __CPU_OZONE_THREAD_STATE_HH__
|
||||
|
||||
@@ -170,7 +170,7 @@ BaseSimpleCPU::regStats()
|
||||
void
|
||||
BaseSimpleCPU::resetStats()
|
||||
{
|
||||
startNumInst = numInst;
|
||||
// startNumInst = numInst;
|
||||
// notIdleFraction = (_status != Idle);
|
||||
}
|
||||
|
||||
|
||||
@@ -162,6 +162,11 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext)
|
||||
if (quiesceEvent) {
|
||||
quiesceEvent->tc = tc;
|
||||
}
|
||||
|
||||
Kernel::Statistics *stats = oldContext->getKernelStats();
|
||||
if (stats) {
|
||||
kernelStats = stats;
|
||||
}
|
||||
#endif
|
||||
|
||||
storeCondFailures = 0;
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#define __CPU_THREAD_STATE_HH__
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "cpu/profile.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
|
||||
#if !FULL_SYSTEM
|
||||
@@ -191,6 +192,21 @@ struct ThreadState {
|
||||
// simulation only; all functional memory accesses should use
|
||||
// one of the FunctionalMemory pointers above.
|
||||
short asid;
|
||||
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void profileClear()
|
||||
{
|
||||
if (profile)
|
||||
profile->clear();
|
||||
}
|
||||
|
||||
void profileSample()
|
||||
{
|
||||
if (profile)
|
||||
profile->sample(profileNode, profilePC);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Current instruction the thread is committing. Only set and
|
||||
|
||||
@@ -242,6 +242,10 @@ class IdeDisk : public SimObject
|
||||
Stats::Scalar<> dmaWriteFullPages;
|
||||
Stats::Scalar<> dmaWriteBytes;
|
||||
Stats::Scalar<> dmaWriteTxs;
|
||||
Stats::Formula rdBandwidth;
|
||||
Stats::Formula wrBandwidth;
|
||||
Stats::Formula totBandwidth;
|
||||
Stats::Formula totBytes;
|
||||
|
||||
public:
|
||||
/**
|
||||
|
||||
@@ -26,6 +26,9 @@ class BaseCPU(SimObject):
|
||||
"terminate when all threads have reached this load count")
|
||||
max_loads_any_thread = Param.Counter(0,
|
||||
"terminate when any thread reaches this load count")
|
||||
stats_reset_inst = Param.Counter(0,
|
||||
"reset stats once this many instructions are committed")
|
||||
progress_interval = Param.Tick(0, "interval to print out the progress message")
|
||||
|
||||
defer_registration = Param.Bool(False,
|
||||
"defer registration with system (for sampling)")
|
||||
|
||||
@@ -9,6 +9,8 @@ class DerivO3CPU(BaseCPU):
|
||||
activity = Param.Unsigned(0, "Initial count")
|
||||
numThreads = Param.Unsigned(1, "number of HW thread contexts")
|
||||
|
||||
if build_env['FULL_SYSTEM']:
|
||||
profile = Param.Latency('0ns', "trace the kernel stack")
|
||||
if build_env['USE_CHECKER']:
|
||||
if not build_env['FULL_SYSTEM']:
|
||||
checker = Param.BaseCPU(O3Checker(workload=Parent.workload,
|
||||
|
||||
@@ -8,12 +8,15 @@ class DerivOzoneCPU(BaseCPU):
|
||||
numThreads = Param.Unsigned("number of HW thread contexts")
|
||||
|
||||
checker = Param.BaseCPU("Checker CPU")
|
||||
if build_env['FULL_SYSTEM']:
|
||||
profile = Param.Latency('0ns', "trace the kernel stack")
|
||||
|
||||
icache_port = Port("Instruction Port")
|
||||
dcache_port = Port("Data Port")
|
||||
|
||||
width = Param.Unsigned("Width")
|
||||
frontEndWidth = Param.Unsigned("Front end width")
|
||||
frontEndLatency = Param.Unsigned("Front end latency")
|
||||
backEndWidth = Param.Unsigned("Back end width")
|
||||
backEndSquashLatency = Param.Unsigned("Back end squash latency")
|
||||
backEndLatency = Param.Unsigned("Back end latency")
|
||||
@@ -76,6 +79,7 @@ class DerivOzoneCPU(BaseCPU):
|
||||
|
||||
LQEntries = Param.Unsigned("Number of load queue entries")
|
||||
SQEntries = Param.Unsigned("Number of store queue entries")
|
||||
lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
|
||||
LFSTSize = Param.Unsigned("Last fetched store table size")
|
||||
SSITSize = Param.Unsigned("Store set ID table size")
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from m5.SimObject import SimObject
|
||||
from m5.params import *
|
||||
from Serialize import Serialize
|
||||
from Serialize import Statreset
|
||||
from Statistics import Statistics
|
||||
from Trace import Trace
|
||||
from ExeTrace import ExecutionTrace
|
||||
|
||||
@@ -16,6 +16,7 @@ class System(SimObject):
|
||||
boot_osflags = Param.String("a", "boot flags to pass to the kernel")
|
||||
kernel = Param.String("file that contains the kernel code")
|
||||
readfile = Param.String("", "file to read startup script from")
|
||||
symbolfile = Param.String("", "file to get the symbols from")
|
||||
|
||||
class AlphaSystem(System):
|
||||
type = 'AlphaSystem'
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "sim/host.hh" // for Tick
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "sim/serialize.hh"
|
||||
|
||||
@@ -135,7 +136,7 @@ class Event : public Serializable, public FastAlloc
|
||||
/// same cycle (after unscheduling the old CPU's tick event).
|
||||
/// The switch needs to come before any tick events to make
|
||||
/// sure we don't tick both CPUs in the same cycle.
|
||||
CPU_Switch_Pri = 31,
|
||||
CPU_Switch_Pri = -31,
|
||||
|
||||
/// Serailization needs to occur before tick events also, so
|
||||
/// that a serialize/unserialize is identical to an on-line
|
||||
@@ -351,7 +352,8 @@ inline void
|
||||
Event::schedule(Tick t)
|
||||
{
|
||||
assert(!scheduled());
|
||||
assert(t >= curTick);
|
||||
// if (t < curTick)
|
||||
// warn("t is less than curTick, ensure you don't want cycles");
|
||||
|
||||
setFlags(Scheduled);
|
||||
#if TRACING_ON
|
||||
|
||||
@@ -148,6 +148,54 @@ namespace AlphaPseudo
|
||||
exitSimLoop(when, "m5_exit instruction encountered");
|
||||
}
|
||||
|
||||
void
|
||||
loadsymbol(ExecContext *xc)
|
||||
{
|
||||
const string &filename = xc->getCpuPtr()->system->params()->symbolfile;
|
||||
if (filename.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::string buffer;
|
||||
ifstream file(filename.c_str());
|
||||
|
||||
if (!file)
|
||||
fatal("file error: Can't open symbol table file %s\n", filename);
|
||||
|
||||
while (!file.eof()) {
|
||||
getline(file, buffer);
|
||||
|
||||
if (buffer.empty())
|
||||
continue;
|
||||
|
||||
int idx = buffer.find(' ');
|
||||
if (idx == string::npos)
|
||||
continue;
|
||||
|
||||
string address = "0x" + buffer.substr(0, idx);
|
||||
eat_white(address);
|
||||
if (address.empty())
|
||||
continue;
|
||||
|
||||
// Skip over letter and space
|
||||
string symbol = buffer.substr(idx + 3);
|
||||
eat_white(symbol);
|
||||
if (symbol.empty())
|
||||
continue;
|
||||
|
||||
Addr addr;
|
||||
if (!to_number(address, addr))
|
||||
continue;
|
||||
|
||||
if (!xc->getSystemPtr()->kernelSymtab->insert(addr, symbol))
|
||||
continue;
|
||||
|
||||
|
||||
DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
|
||||
}
|
||||
file.close();
|
||||
}
|
||||
|
||||
void
|
||||
resetstats(ThreadContext *tc, Tick delay, Tick period)
|
||||
{
|
||||
|
||||
@@ -51,6 +51,7 @@ namespace AlphaPseudo
|
||||
void ivle(ThreadContext *tc);
|
||||
void m5exit(ThreadContext *tc, Tick delay);
|
||||
void m5exit_old(ThreadContext *tc);
|
||||
void loadsymbol(ThreadContext *xc);
|
||||
void resetstats(ThreadContext *tc, Tick delay, Tick period);
|
||||
void dumpstats(ThreadContext *tc, Tick delay, Tick period);
|
||||
void dumpresetstats(ThreadContext *tc, Tick delay, Tick period);
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
#include "sim/sim_exit.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
// For stat reset hack
|
||||
#include "sim/stat_control.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int Serializable::ckptMaxCount = 0;
|
||||
@@ -404,3 +407,36 @@ Checkpoint::sectionExists(const std::string §ion)
|
||||
{
|
||||
return db->sectionExists(section);
|
||||
}
|
||||
|
||||
/** Hacked stat reset event */
|
||||
|
||||
class StatresetParamContext : public ParamContext
|
||||
{
|
||||
public:
|
||||
StatresetParamContext(const string §ion);
|
||||
~StatresetParamContext();
|
||||
void startup();
|
||||
};
|
||||
|
||||
StatresetParamContext statParams("statsreset");
|
||||
|
||||
Param<Tick> reset_cycle(&statParams, "reset_cycle",
|
||||
"Cycle to reset stats on", 0);
|
||||
|
||||
StatresetParamContext::StatresetParamContext(const string §ion)
|
||||
: ParamContext(section)
|
||||
{ }
|
||||
|
||||
StatresetParamContext::~StatresetParamContext()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
StatresetParamContext::startup()
|
||||
{
|
||||
if (reset_cycle > 0) {
|
||||
Stats::SetupEvent(Stats::Reset, curTick + reset_cycle, 0);
|
||||
cprintf("Stats reset event scheduled for %lli\n",
|
||||
curTick + reset_cycle);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,13 +160,13 @@ class StatEvent : public Event
|
||||
Tick repeat;
|
||||
|
||||
public:
|
||||
StatEvent(int _flags, Tick _when, Tick _repeat);
|
||||
StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat);
|
||||
virtual void process();
|
||||
virtual const char *description();
|
||||
};
|
||||
|
||||
StatEvent::StatEvent(int _flags, Tick _when, Tick _repeat)
|
||||
: Event(&mainEventQueue, Stat_Event_Pri),
|
||||
StatEvent::StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat)
|
||||
: Event(queue, Stat_Event_Pri),
|
||||
flags(_flags), repeat(_repeat)
|
||||
{
|
||||
setFlags(AutoDelete);
|
||||
@@ -185,8 +185,10 @@ StatEvent::process()
|
||||
if (flags & Stats::Dump)
|
||||
DumpNow();
|
||||
|
||||
if (flags & Stats::Reset)
|
||||
if (flags & Stats::Reset) {
|
||||
cprintf("Resetting stats!\n");
|
||||
reset();
|
||||
}
|
||||
|
||||
if (repeat)
|
||||
schedule(curTick + repeat);
|
||||
@@ -214,9 +216,12 @@ DumpNow()
|
||||
}
|
||||
|
||||
void
|
||||
SetupEvent(int flags, Tick when, Tick repeat)
|
||||
SetupEvent(int flags, Tick when, Tick repeat, EventQueue *queue)
|
||||
{
|
||||
new StatEvent(flags, when, repeat);
|
||||
if (queue == NULL)
|
||||
queue = &mainEventQueue;
|
||||
|
||||
new StatEvent(queue, flags, when, repeat);
|
||||
}
|
||||
|
||||
/* namespace Stats */ }
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
#include <fstream>
|
||||
#include <list>
|
||||
|
||||
class EventQueue;
|
||||
|
||||
namespace Stats {
|
||||
|
||||
enum {
|
||||
@@ -45,7 +47,7 @@ class Output;
|
||||
extern std::list<Output *> OutputList;
|
||||
|
||||
void DumpNow();
|
||||
void SetupEvent(int flags, Tick when, Tick repeat = 0);
|
||||
void SetupEvent(int flags, Tick when, Tick repeat = 0, EventQueue *queue = NULL);
|
||||
|
||||
void InitSimStats();
|
||||
|
||||
|
||||
@@ -182,6 +182,7 @@ class System : public SimObject
|
||||
|
||||
std::string kernel_path;
|
||||
std::string readfile;
|
||||
std::string symbolfile;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user