Merge ktlim@zamp:./local/clean/o3-merge/m5

into  zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem

configs/boot/micro_memlat.rcS:
configs/boot/micro_tlblat.rcS:
src/arch/alpha/ev5.cc:
src/arch/alpha/isa/decoder.isa:
src/arch/alpha/isa_traits.hh:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.hh:
src/cpu/checker/cpu.hh:
src/cpu/checker/cpu_impl.hh:
src/cpu/o3/alpha/cpu_impl.hh:
src/cpu/o3/alpha/params.hh:
src/cpu/o3/checker_builder.cc:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_impl.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/checker_builder.cc:
src/cpu/ozone/cpu.hh:
src/cpu/ozone/cpu_impl.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/front_end_impl.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_back_end_impl.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/lw_lsq_impl.hh:
src/cpu/ozone/thread_state.hh:
src/cpu/simple/base.cc:
src/cpu/simple_thread.cc:
src/cpu/simple_thread.hh:
src/cpu/thread_state.hh:
src/dev/ide_disk.cc:
src/python/m5/objects/O3CPU.py:
src/python/m5/objects/Root.py:
src/python/m5/objects/System.py:
src/sim/pseudo_inst.cc:
src/sim/pseudo_inst.hh:
src/sim/system.hh:
util/m5/m5.c:
    Hand merge.

--HG--
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/o3/checker_builder.cc
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/ozone/checker_builder.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/simple/cpu.cc => src/cpu/simple/base.cc
rename : cpu/cpu_exec_context.cc => src/cpu/simple_thread.cc
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/O3CPU.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/system.hh => src/sim/system.hh
extra : convert_revision : 135d90e43f6cea89f9460ba4e23f4b0b85886e7d
This commit is contained in:
Kevin Lim
2006-09-30 23:43:23 -04:00
87 changed files with 8363 additions and 515 deletions

View File

@@ -554,6 +554,7 @@ AlphaISA::MiscRegFile::setIpr(int idx, uint64_t val, ThreadContext *tc)
return NoFault;
}
void
AlphaISA::copyIprs(ThreadContext *src, ThreadContext *dest)
{
@@ -562,6 +563,7 @@ AlphaISA::copyIprs(ThreadContext *src, ThreadContext *dest)
}
}
/**
* Check for special simulator handling of specific PAL calls.
* If return value is false, actual PAL call will be suppressed.

View File

@@ -105,6 +105,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
Param<string> boot_osflags;
Param<string> readfile;
Param<string> symbolfile;
Param<unsigned int> init_param;
Param<uint64_t> system_type;
@@ -124,6 +125,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
"a"),
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
@@ -143,6 +145,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem)
p->boot_osflags = boot_osflags;
p->init_param = init_param;
p->readfile = readfile;
p->symbolfile = symbolfile;
p->system_type = system_type;
p->system_rev = system_rev;
return new FreebsdAlphaSystem(p);

View File

@@ -779,10 +779,10 @@ decode OPCODE default Unknown::unknown() {
}}, IsNonSpeculative, IsQuiesce);
0x03: quiesceCycles({{
AlphaPseudo::quiesceCycles(xc->tcBase(), R16);
}}, IsNonSpeculative, IsQuiesce);
}}, IsNonSpeculative, IsQuiesce, IsUnverifiable);
0x04: quiesceTime({{
R0 = AlphaPseudo::quiesceTime(xc->tcBase());
}}, IsNonSpeculative);
}}, IsNonSpeculative, IsUnverifiable);
0x10: ivlb({{
AlphaPseudo::ivlb(xc->tcBase());
}}, No_OpClass, IsNonSpeculative);
@@ -795,6 +795,9 @@ decode OPCODE default Unknown::unknown() {
0x21: m5exit({{
AlphaPseudo::m5exit(xc->tcBase(), R16);
}}, No_OpClass, IsNonSpeculative);
0x31: loadsymbol({{
AlphaPseudo::loadsymbol(xc->tcBase());
}}, No_OpClass, IsNonSpeculative);
0x30: initparam({{ Ra = xc->tcBase()->getCpuPtr()->system->init_param; }});
0x40: resetstats({{
AlphaPseudo::resetstats(xc->tcBase(), R16, R17);

View File

@@ -528,7 +528,7 @@ def template MiscInitiateAcc {{
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
panic("Misc instruction does not support split access method!");
warn("Misc instruction does not support split access method!");
return NoFault;
}
}};
@@ -539,7 +539,7 @@ def template MiscCompleteAcc {{
%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
panic("Misc instruction does not support split access method!");
warn("Misc instruction does not support split access method!");
return NoFault;
}

View File

@@ -42,190 +42,10 @@ class StaticInstPtr;
namespace AlphaISA
{
using namespace LittleEndianGuest;
// These enumerate all the registers for dependence tracking.
enum DependenceTags {
// 0..31 are the integer regs 0..31
// 32..63 are the FP regs 0..31, i.e. use (reg + FP_Base_DepTag)
FP_Base_DepTag = 40,
Ctrl_Base_DepTag = 72,
Fpcr_DepTag = 72, // floating point control register
Uniq_DepTag = 73,
Lock_Flag_DepTag = 74,
Lock_Addr_DepTag = 75,
IPR_Base_DepTag = 76
};
StaticInstPtr decodeInst(ExtMachInst);
// Alpha Does NOT have a delay slot
#define ISA_HAS_DELAY_SLOT 0
const Addr PageShift = 13;
const Addr PageBytes = ULL(1) << PageShift;
const Addr PageMask = ~(PageBytes - 1);
const Addr PageOffset = PageBytes - 1;
#if FULL_SYSTEM
////////////////////////////////////////////////////////////////////////
//
// Translation stuff
//
const Addr PteShift = 3;
const Addr NPtePageShift = PageShift - PteShift;
const Addr NPtePage = ULL(1) << NPtePageShift;
const Addr PteMask = NPtePage - 1;
// User Virtual
const Addr USegBase = ULL(0x0);
const Addr USegEnd = ULL(0x000003ffffffffff);
// Kernel Direct Mapped
const Addr K0SegBase = ULL(0xfffffc0000000000);
const Addr K0SegEnd = ULL(0xfffffdffffffffff);
// Kernel Virtual
const Addr K1SegBase = ULL(0xfffffe0000000000);
const Addr K1SegEnd = ULL(0xffffffffffffffff);
// For loading... XXX This maybe could be USegEnd?? --ali
const Addr LoadAddrMask = ULL(0xffffffffff);
////////////////////////////////////////////////////////////////////////
//
// Interrupt levels
//
enum InterruptLevels
{
INTLEVEL_SOFTWARE_MIN = 4,
INTLEVEL_SOFTWARE_MAX = 19,
INTLEVEL_EXTERNAL_MIN = 20,
INTLEVEL_EXTERNAL_MAX = 34,
INTLEVEL_IRQ0 = 20,
INTLEVEL_IRQ1 = 21,
INTINDEX_ETHERNET = 0,
INTINDEX_SCSI = 1,
INTLEVEL_IRQ2 = 22,
INTLEVEL_IRQ3 = 23,
INTLEVEL_SERIAL = 33,
NumInterruptLevels = INTLEVEL_EXTERNAL_MAX
};
// EV5 modes
enum mode_type
{
mode_kernel = 0, // kernel
mode_executive = 1, // executive (unused by unix)
mode_supervisor = 2, // supervisor (unused by unix)
mode_user = 3, // user mode
mode_number // number of modes
};
#endif
#if FULL_SYSTEM
////////////////////////////////////////////////////////////////////////
//
// Internal Processor Reigsters
//
enum md_ipr_names
{
IPR_ISR = 0x100, // interrupt summary register
IPR_ITB_TAG = 0x101, // ITLB tag register
IPR_ITB_PTE = 0x102, // ITLB page table entry register
IPR_ITB_ASN = 0x103, // ITLB address space register
IPR_ITB_PTE_TEMP = 0x104, // ITLB page table entry temp register
IPR_ITB_IA = 0x105, // ITLB invalidate all register
IPR_ITB_IAP = 0x106, // ITLB invalidate all process register
IPR_ITB_IS = 0x107, // ITLB invalidate select register
IPR_SIRR = 0x108, // software interrupt request register
IPR_ASTRR = 0x109, // asynchronous system trap request register
IPR_ASTER = 0x10a, // asynchronous system trap enable register
IPR_EXC_ADDR = 0x10b, // exception address register
IPR_EXC_SUM = 0x10c, // exception summary register
IPR_EXC_MASK = 0x10d, // exception mask register
IPR_PAL_BASE = 0x10e, // PAL base address register
IPR_ICM = 0x10f, // instruction current mode
IPR_IPLR = 0x110, // interrupt priority level register
IPR_INTID = 0x111, // interrupt ID register
IPR_IFAULT_VA_FORM = 0x112, // formatted faulting virtual addr register
IPR_IVPTBR = 0x113, // virtual page table base register
IPR_HWINT_CLR = 0x115, // H/W interrupt clear register
IPR_SL_XMIT = 0x116, // serial line transmit register
IPR_SL_RCV = 0x117, // serial line receive register
IPR_ICSR = 0x118, // instruction control and status register
IPR_IC_FLUSH = 0x119, // instruction cache flush control
IPR_IC_PERR_STAT = 0x11a, // inst cache parity error status register
IPR_PMCTR = 0x11c, // performance counter register
// PAL temporary registers...
// register meanings gleaned from osfpal.s source code
IPR_PALtemp0 = 0x140, // local scratch
IPR_PALtemp1 = 0x141, // local scratch
IPR_PALtemp2 = 0x142, // entUna
IPR_PALtemp3 = 0x143, // CPU specific impure area pointer
IPR_PALtemp4 = 0x144, // memory management temp
IPR_PALtemp5 = 0x145, // memory management temp
IPR_PALtemp6 = 0x146, // memory management temp
IPR_PALtemp7 = 0x147, // entIF
IPR_PALtemp8 = 0x148, // intmask
IPR_PALtemp9 = 0x149, // entSys
IPR_PALtemp10 = 0x14a, // ??
IPR_PALtemp11 = 0x14b, // entInt
IPR_PALtemp12 = 0x14c, // entArith
IPR_PALtemp13 = 0x14d, // reserved for platform specific PAL
IPR_PALtemp14 = 0x14e, // reserved for platform specific PAL
IPR_PALtemp15 = 0x14f, // reserved for platform specific PAL
IPR_PALtemp16 = 0x150, // scratch / whami<7:0> / mces<4:0>
IPR_PALtemp17 = 0x151, // sysval
IPR_PALtemp18 = 0x152, // usp
IPR_PALtemp19 = 0x153, // ksp
IPR_PALtemp20 = 0x154, // PTBR
IPR_PALtemp21 = 0x155, // entMM
IPR_PALtemp22 = 0x156, // kgp
IPR_PALtemp23 = 0x157, // PCBB
IPR_DTB_ASN = 0x200, // DTLB address space number register
IPR_DTB_CM = 0x201, // DTLB current mode register
IPR_DTB_TAG = 0x202, // DTLB tag register
IPR_DTB_PTE = 0x203, // DTLB page table entry register
IPR_DTB_PTE_TEMP = 0x204, // DTLB page table entry temporary register
IPR_MM_STAT = 0x205, // data MMU fault status register
IPR_VA = 0x206, // fault virtual address register
IPR_VA_FORM = 0x207, // formatted virtual address register
IPR_MVPTBR = 0x208, // MTU virtual page table base register
IPR_DTB_IAP = 0x209, // DTLB invalidate all process register
IPR_DTB_IA = 0x20a, // DTLB invalidate all register
IPR_DTB_IS = 0x20b, // DTLB invalidate single register
IPR_ALT_MODE = 0x20c, // alternate mode register
IPR_CC = 0x20d, // cycle counter register
IPR_CC_CTL = 0x20e, // cycle counter control register
IPR_MCSR = 0x20f, // MTU control register
IPR_DC_FLUSH = 0x210,
IPR_DC_PERR_STAT = 0x212, // Dcache parity error status register
IPR_DC_TEST_CTL = 0x213, // Dcache test tag control register
IPR_DC_TEST_TAG = 0x214, // Dcache test tag register
IPR_DC_TEST_TAG_TEMP = 0x215, // Dcache test tag temporary register
IPR_DC_MODE = 0x216, // Dcache mode register
IPR_MAF_MODE = 0x217, // miss address file mode register
NumInternalProcRegs // number of IPR registers
};
#else
const int NumInternalProcRegs = 0;
#endif
// Constants Related to the number of registers
typedef uint32_t MachInst;
typedef uint64_t ExtMachInst;
typedef uint8_t RegIndex;
const int NumIntArchRegs = 32;
const int NumPALShadowRegs = 8;
@@ -233,15 +53,6 @@ namespace AlphaISA
// @todo: Figure out what this number really should be.
const int NumMiscArchRegs = 32;
const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
const int NumFloatRegs = NumFloatArchRegs;
const int NumMiscRegs = NumMiscArchRegs;
const int TotalNumRegs = NumIntRegs + NumFloatRegs +
NumMiscRegs + NumInternalProcRegs;
const int TotalDataRegs = NumIntRegs + NumFloatRegs;
// Static instruction parameters
const int MaxInstSrcRegs = 3;
const int MaxInstDestRegs = 2;
@@ -265,23 +76,270 @@ namespace AlphaISA
const int SyscallPseudoReturnReg = ArgumentReg4;
const int SyscallSuccessReg = 19;
const int LogVMPageSize = 13; // 8K bytes
const int VMPageSize = (1 << LogVMPageSize);
const int BranchPredAddrShiftAmt = 2; // instructions are 4-byte aligned
const int MachineBytes = 8;
const int WordBytes = 4;
const int HalfwordBytes = 2;
const int ByteBytes = 1;
const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
const int NumFloatRegs = NumFloatArchRegs;
const int NumMiscRegs = NumMiscArchRegs;
// These enumerate all the registers for dependence tracking.
enum DependenceTags {
// 0..31 are the integer regs 0..31
// 32..63 are the FP regs 0..31, i.e. use (reg + FP_Base_DepTag)
FP_Base_DepTag = 40,
Ctrl_Base_DepTag = 72,
Fpcr_DepTag = 72, // floating point control register
Uniq_DepTag = 73,
Lock_Flag_DepTag = 74,
Lock_Addr_DepTag = 75,
IPR_Base_DepTag = 76
};
typedef uint64_t IntReg;
typedef IntReg IntRegFile[NumIntRegs];
// floating point register file entry type
typedef union {
uint64_t q;
double d;
} FloatReg;
typedef union {
uint64_t q[NumFloatRegs]; // integer qword view
double d[NumFloatRegs]; // double-precision floating point view
void clear()
{ bzero(d, sizeof(d)); }
} FloatRegFile;
extern const Addr PageShift;
extern const Addr PageBytes;
extern const Addr PageMask;
extern const Addr PageOffset;
// redirected register map, really only used for the full system case.
extern const int reg_redir[NumIntRegs];
#if FULL_SYSTEM
typedef uint64_t InternalProcReg;
#include "arch/alpha/isa_fullsys_traits.hh"
#else
const int NumInternalProcRegs = 0;
#endif
// control register file contents
typedef uint64_t MiscReg;
class MiscRegFile {
protected:
uint64_t fpcr; // floating point condition codes
uint64_t uniq; // process-unique register
bool lock_flag; // lock flag for LL/SC
Addr lock_addr; // lock address for LL/SC
public:
MiscReg readReg(int misc_reg);
//These functions should be removed once the simplescalar cpu model
//has been replaced.
int getInstAsid();
int getDataAsid();
MiscReg readRegWithEffect(int misc_reg, Fault &fault, ExecContext *xc);
Fault setReg(int misc_reg, const MiscReg &val);
Fault setRegWithEffect(int misc_reg, const MiscReg &val,
ExecContext *xc);
void serialize(std::ostream &os);
void unserialize(Checkpoint *cp, const std::string &section);
void clear()
{
fpcr = uniq = 0;
lock_flag = 0;
lock_addr = 0;
}
#if FULL_SYSTEM
protected:
InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs
private:
MiscReg readIpr(int idx, Fault &fault, ExecContext *xc);
Fault setIpr(int idx, uint64_t val, ExecContext *xc);
void copyIprs(ExecContext *xc);
#endif
friend class RegFile;
};
const int TotalNumRegs = NumIntRegs + NumFloatRegs +
NumMiscRegs + NumInternalProcRegs;
const int TotalDataRegs = NumIntRegs + NumFloatRegs;
typedef union {
IntReg intreg;
FloatReg fpreg;
MiscReg ctrlreg;
} AnyReg;
struct RegFile {
IntRegFile intRegFile; // (signed) integer register file
FloatRegFile floatRegFile; // floating point register file
MiscRegFile miscRegs; // control register file
Addr pc; // program counter
Addr npc; // next-cycle program counter
Addr nnpc;
#if FULL_SYSTEM
int intrflag; // interrupt flag
inline int instAsid()
{ return EV5::ITB_ASN_ASN(miscRegs.ipr[IPR_ITB_ASN]); }
inline int dataAsid()
{ return EV5::DTB_ASN_ASN(miscRegs.ipr[IPR_DTB_ASN]); }
#endif // FULL_SYSTEM
void serialize(std::ostream &os);
void unserialize(Checkpoint *cp, const std::string &section);
void clear()
{
bzero(intRegFile, sizeof(intRegFile));
floatRegFile.clear();
miscRegs.clear();
}
};
static inline ExtMachInst makeExtMI(MachInst inst, const uint64_t &pc);
StaticInstPtr decodeInst(ExtMachInst);
// Alpha Does NOT have a delay slot
#define ISA_HAS_DELAY_SLOT 0
// return a no-op instruction... used for instruction fetch faults
// Alpha UNOP (ldq_u r31,0(r0))
const ExtMachInst NoopMachInst = 0x2ffe0000;
extern const ExtMachInst NoopMachInst;
// redirected register map, really only used for the full system case.
extern const int reg_redir[NumIntRegs];
enum annotes {
ANNOTE_NONE = 0,
// An impossible number for instruction annotations
ITOUCH_ANNOTE = 0xffffffff,
};
static inline bool isCallerSaveIntegerRegister(unsigned int reg) {
panic("register classification not implemented");
return (reg >= 1 && reg <= 8 || reg >= 22 && reg <= 25 || reg == 27);
}
static inline bool isCalleeSaveIntegerRegister(unsigned int reg) {
panic("register classification not implemented");
return (reg >= 9 && reg <= 15);
}
static inline bool isCallerSaveFloatRegister(unsigned int reg) {
panic("register classification not implemented");
return false;
}
static inline bool isCalleeSaveFloatRegister(unsigned int reg) {
panic("register classification not implemented");
return false;
}
static inline Addr alignAddress(const Addr &addr,
unsigned int nbytes) {
return (addr & ~(nbytes - 1));
}
// Instruction address compression hooks
static inline Addr realPCToFetchPC(const Addr &addr) {
return addr;
}
static inline Addr fetchPCToRealPC(const Addr &addr) {
return addr;
}
// the size of "fetched" instructions (not necessarily the size
// of real instructions for PISA)
static inline size_t fetchInstSize() {
return sizeof(MachInst);
}
static inline MachInst makeRegisterCopy(int dest, int src) {
panic("makeRegisterCopy not implemented");
return 0;
}
// Machine operations
void saveMachineReg(AnyReg &savereg, const RegFile &reg_file,
int regnum);
void restoreMachineReg(RegFile &regs, const AnyReg &reg,
int regnum);
#if 0
static void serializeSpecialRegs(const Serializable::Proxy &proxy,
const RegFile &regs);
static void unserializeSpecialRegs(const IniFile *db,
const std::string &category,
ConfigNode *node,
RegFile &regs);
#endif
/**
* Function to insure ISA semantics about 0 registers.
* @param xc The execution context.
*/
template <class XC>
void zeroRegisters(XC *xc);
const Addr MaxAddr = (Addr)-1;
#if !FULL_SYSTEM
static inline void setSyscallReturn(SyscallReturn return_value, RegFile *regs)
{
// check for error condition. Alpha syscall convention is to
// indicate success/failure in reg a3 (r19) and put the
// return value itself in the standard return value reg (v0).
if (return_value.successful()) {
// no error
regs->intRegFile[SyscallSuccessReg] = 0;
regs->intRegFile[ReturnValueReg] = return_value.value();
} else {
// got an error, return details
regs->intRegFile[SyscallSuccessReg] = (IntReg) -1;
regs->intRegFile[ReturnValueReg] = -return_value.value();
}
}
#endif
void copyRegs(ExecContext *src, ExecContext *dest);
void copyMiscRegs(ExecContext *src, ExecContext *dest);
#if FULL_SYSTEM
void copyIprs(ExecContext *src, ExecContext *dest);
#endif
};
#endif // __ARCH_ALPHA_ISA_TRAITS_HH__

View File

@@ -199,6 +199,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
Param<string> boot_osflags;
Param<string> readfile;
Param<string> symbolfile;
Param<unsigned int> init_param;
Param<uint64_t> system_type;
@@ -218,6 +219,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
"a"),
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
@@ -237,6 +239,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem)
p->boot_osflags = boot_osflags;
p->init_param = init_param;
p->readfile = readfile;
p->symbolfile = symbolfile;
p->system_type = system_type;
p->system_rev = system_rev;
return new LinuxAlphaSystem(p);

View File

@@ -229,6 +229,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem)
Param<std::string> boot_osflags;
Param<std::string> readfile;
Param<std::string> symbolfile;
Param<unsigned int> init_param;
Param<uint64_t> system_type;
@@ -248,6 +249,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem)
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
"a"),
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
@@ -267,6 +269,7 @@ CREATE_SIM_OBJECT(AlphaSystem)
p->boot_osflags = boot_osflags;
p->init_param = init_param;
p->readfile = readfile;
p->symbolfile = symbolfile;
p->system_type = system_type;
p->system_rev = system_rev;
return new AlphaSystem(p);

View File

@@ -103,6 +103,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
Param<string> boot_osflags;
Param<string> readfile;
Param<string> symbolfile;
Param<unsigned int> init_param;
Param<uint64_t> system_type;
@@ -122,6 +123,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
"a"),
INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 12),
INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 2<<1)
@@ -141,6 +143,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem)
p->boot_osflags = boot_osflags;
p->init_param = init_param;
p->readfile = readfile;
p->symbolfile = symbolfile;
p->system_type = system_type;
p->system_rev = system_rev;

View File

@@ -48,6 +48,9 @@
#include "base/trace.hh"
// Hack
#include "sim/stat_control.hh"
using namespace std;
vector<BaseCPU *> BaseCPU::cpuList;
@@ -57,6 +60,30 @@ vector<BaseCPU *> BaseCPU::cpuList;
// been initialized
int maxThreadsPerCPU = 1;
void
CPUProgressEvent::process()
{
Counter temp = cpu->totalInstructions();
#ifndef NDEBUG
double ipc = double(temp - lastNumInst) / (interval / cpu->cycles(1));
DPRINTFN("%s progress event, instructions committed: %lli, IPC: %0.8d\n",
cpu->name(), temp - lastNumInst, ipc);
ipc = 0.0;
#else
cprintf("%lli: %s progress event, instructions committed: %lli\n",
curTick, cpu->name(), temp - lastNumInst);
#endif
lastNumInst = temp;
schedule(curTick + interval);
}
const char *
CPUProgressEvent::description()
{
return "CPU Progress event";
}
#if FULL_SYSTEM
BaseCPU::BaseCPU(Params *p)
: MemObject(p->name), clock(p->clock), checkInterrupts(true),
@@ -67,6 +94,7 @@ BaseCPU::BaseCPU(Params *p)
number_of_threads(p->numberOfThreads), system(p->system)
#endif
{
// currentTick = curTick;
DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
// add self to global list of CPUs
@@ -128,6 +156,12 @@ BaseCPU::BaseCPU(Params *p)
p->max_loads_all_threads, *counter);
}
if (p->stats_reset_inst != 0) {
Stats::SetupEvent(Stats::Reset, p->stats_reset_inst, 0, comInstEventQueue[0]);
cprintf("Stats reset event scheduled for %lli insts\n",
p->stats_reset_inst);
}
#if FULL_SYSTEM
memset(interrupts, 0, sizeof(interrupts));
intstatus = 0;
@@ -153,7 +187,6 @@ BaseCPU::BaseCPU(Params *p)
if (params->profile)
profileEvent = new ProfileEvent(this, params->profile);
#endif
}
BaseCPU::Params::Params()
@@ -188,6 +221,11 @@ BaseCPU::startup()
if (!params->deferRegistration && profileEvent)
profileEvent->schedule(curTick);
#endif
if (params->progress_interval) {
new CPUProgressEvent(&mainEventQueue, params->progress_interval,
this);
}
}
@@ -238,7 +276,11 @@ BaseCPU::registerThreadContexts()
void
BaseCPU::switchOut()
{
panic("This CPU doesn't support sampling!");
// panic("This CPU doesn't support sampling!");
#if FULL_SYSTEM
if (profileEvent && profileEvent->scheduled())
profileEvent->deschedule();
#endif
}
void
@@ -261,18 +303,22 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
assert(newTC->getProcessPtr() == oldTC->getProcessPtr());
newTC->getProcessPtr()->replaceThreadContext(newTC, newTC->readCpuId());
#endif
// TheISA::compareXCs(oldXC, newXC);
}
#if FULL_SYSTEM
for (int i = 0; i < TheISA::NumInterruptLevels; ++i)
interrupts[i] = oldCPU->interrupts[i];
intstatus = oldCPU->intstatus;
checkInterrupts = oldCPU->checkInterrupts;
for (int i = 0; i < threadContexts.size(); ++i)
threadContexts[i]->profileClear();
if (profileEvent)
profileEvent->schedule(curTick);
// The Sampler must take care of this!
// if (profileEvent)
// profileEvent->schedule(curTick);
#endif
}

View File

@@ -46,6 +46,23 @@ class ThreadContext;
class System;
class Port;
class CPUProgressEvent : public Event
{
protected:
Tick interval;
Counter lastNumInst;
BaseCPU *cpu;
public:
CPUProgressEvent(EventQueue *q, Tick ival, BaseCPU *_cpu)
: Event(q, Event::Stat_Event_Pri), interval(ival), lastNumInst(0), cpu(_cpu)
{ schedule(curTick + interval); }
void process();
virtual const char *description();
};
class BaseCPU : public MemObject
{
protected:
@@ -53,6 +70,7 @@ class BaseCPU : public MemObject
Tick clock;
public:
// Tick currentTick;
inline Tick frequency() const { return Clock::Frequency / clock; }
inline Tick cycles(int numCycles) const { return clock * numCycles; }
inline Tick curCycle() const { return curTick / clock; }
@@ -120,6 +138,7 @@ class BaseCPU : public MemObject
Counter max_insts_all_threads;
Counter max_loads_any_thread;
Counter max_loads_all_threads;
Counter stats_reset_inst;
Tick clock;
bool functionTrace;
Tick functionTraceStart;
@@ -128,6 +147,7 @@ class BaseCPU : public MemObject
int cpu_id;
Tick profile;
#endif
Tick progress_interval;
BaseCPU *checker;
Params();

View File

@@ -197,7 +197,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
union Result {
uint64_t integer;
float fp;
// float fp;
double dbl;
};
@@ -394,7 +394,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
uint64_t readIntResult() { return instResult.integer; }
/** Returns the result of a floating point instruction. */
float readFloatResult() { return instResult.fp; }
float readFloatResult() { return (float)instResult.dbl; }
/** Returns the result of a floating point (double) instruction. */
double readDoubleResult() { return instResult.dbl; }
@@ -419,7 +419,8 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Records an fp register being set to a value. */
void setFloatReg(const StaticInst *si, int idx, FloatReg val)
{
instResult.fp = val;
// instResult.fp = val;
instResult.dbl = (double)val;
}
/** Records an fp register being set to an integer value. */

View File

@@ -102,6 +102,7 @@ class CheckerCPU : public BaseCPU
Process *process;
#endif
bool exitOnError;
bool updateOnError;
bool warnOnlyOnLoadError;
};
@@ -148,7 +149,7 @@ class CheckerCPU : public BaseCPU
union Result {
uint64_t integer;
float fp;
// float fp;
double dbl;
};
@@ -269,7 +270,7 @@ class CheckerCPU : public BaseCPU
{
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
thread->setFloatReg(reg_idx, val);
result.fp = val;
result.dbl = (double)val;
}
void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val,
@@ -318,7 +319,7 @@ class CheckerCPU : public BaseCPU
return thread->setMiscRegWithEffect(misc_reg, val);
}
void recordPCChange(uint64_t val) { changedPC = true; }
void recordPCChange(uint64_t val) { changedPC = true; newPC = val; }
void recordNextPCChange(uint64_t val) { changedNextPC = true; }
bool translateInstReq(Request *req);
@@ -360,6 +361,7 @@ class CheckerCPU : public BaseCPU
uint64_t newPC;
bool changedNextPC;
bool exitOnError;
bool updateOnError;
bool warnOnlyOnLoadError;
InstSeqNum youngestSN;
@@ -376,7 +378,7 @@ class Checker : public CheckerCPU
{
public:
Checker(Params *p)
: CheckerCPU(p)
: CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL)
{ }
void switchOut();
@@ -393,12 +395,19 @@ class Checker : public CheckerCPU
private:
void handleError(DynInstPtr &inst)
{
if (exitOnError)
if (exitOnError) {
dumpAndExit(inst);
} else if (updateOnError) {
updateThisCycle = true;
}
}
void dumpAndExit(DynInstPtr &inst);
bool updateThisCycle;
DynInstPtr unverifiedInst;
std::list<DynInstPtr> instList;
typedef typename std::list<DynInstPtr>::iterator InstListIt;
void dumpInsts();

View File

@@ -94,6 +94,8 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
}
}
unverifiedInst = inst;
// Try to check all instructions that are completed, ending if we
// run out of instructions to check or if an instruction is not
// yet completed.
@@ -171,7 +173,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
thread->setPC(thread->readNextPC());
thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
return;
break;
} else {
// The instruction is carrying an ITB fault. Handle
// the fault and see if our results match the CPU on
@@ -220,7 +222,8 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
thread->funcExeInst++;
fault = curStaticInst->execute(this, NULL);
if (!inst->isUnverifiable())
fault = curStaticInst->execute(this, NULL);
// Checks to make sure instrution results are correct.
validateExecution(inst);
@@ -289,6 +292,7 @@ Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
break;
}
}
unverifiedInst = NULL;
}
template <class DynInstPtr>
@@ -395,6 +399,23 @@ template <class DynInstPtr>
void
Checker<DynInstPtr>::validateState()
{
if (updateThisCycle) {
warn("%lli: Instruction PC %#x results didn't match up, copying all "
"registers from main CPU", curTick, unverifiedInst->readPC());
// Heavy-weight copying of all registers
cpuXC->copyArchRegs(unverifiedInst->xcBase());
// Also advance the PC. Hopefully no PC-based events happened.
#if THE_ISA != MIPS_ISA
// go to the next instruction
cpuXC->setPC(cpuXC->readNextPC());
cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
#else
// go to the next instruction
cpuXC->setPC(cpuXC->readNextPC());
cpuXC->setNextPC(cpuXC->readNextNPC());
cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
#endif
updateThisCycle = false;
}
template <class DynInstPtr>

View File

@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
#endif // FULL_SYSTEM
@@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
Param<unsigned> cachePorts;
@@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
#endif // FULL_SYSTEM
@@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
"Terminate when all threads have reached this load"
"count",
0),
INIT_PARAM_DFLT(stats_reset_inst,
"blah",
0),
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
@@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
#else
params->workload = workload;
#endif // FULL_SYSTEM
@@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
params->stats_reset_inst = stats_reset_inst;
params->progress_interval = progress_interval;
//
// Caches

View File

@@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<bool> defer_registration;
Param<bool> exitOnError;
Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(stats_reset_inst,
"blah"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker)
params->max_insts_all_threads = 0;
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->stats_reset_inst = 0;
params->exitOnError = exitOnError;
params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
temp = stats_reset_inst;
Tick temp2 = progress_interval;
params->progress_interval = 0;
temp2++;
#if FULL_SYSTEM
params->itb = itb;

View File

@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
}
updateComInstStats(head_inst);
#if FULL_SYSTEM
if (thread[tid]->profile) {
// bool usermode =
// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
thread[tid]->profilePC = head_inst->readPC();
ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
head_inst->staticInst);
if (node)
thread[tid]->profileNode = node;
}
#endif
if (head_inst->traceData) {
head_inst->traceData->setFetchSeq(head_inst->seqNum);
head_inst->traceData->setCPSeq(thread[tid]->numInst);
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
if (head_inst->isCopy())
panic("Should not commit any copy instructions!");
// Finally clear the head ROB entry.
rob->retireHead(tid);

View File

@@ -33,6 +33,7 @@
#include "config/use_checker.hh"
#if FULL_SYSTEM
#include "cpu/quiesce_event.hh"
#include "sim/system.hh"
#else
#include "sim/process.hh"
@@ -793,6 +794,8 @@ template <class Impl>
unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
DPRINTF(O3CPU, "Switching out\n");
BaseCPU::switchOut(_sampler);
drainCount = 0;
fetch.drain();
decode.drain();
@@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut()
{
fetch.switchOut();
rename.switchOut();
iew.switchOut();
commit.switchOut();
instList.clear();
while (!removeList.empty()) {
@@ -930,6 +934,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
template <class Impl>
void
FullO3CPU<Impl>::serialize(std::ostream &os)
{
BaseCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
// Use SimpleThread's ability to checkpoint to make it easier to
// write out the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
for (int i = 0; i < thread.size(); i++) {
nameOut(os, csprintf("%s.xc.%i", name(), i));
temp.copyXC(thread[i]->getXCProxy());
temp.serialize(os);
}
}
template <class Impl>
void
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
BaseCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
// Use SimpleThread's ability to checkpoint to make it easier to
// read in the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
for (int i = 0; i < thread.size(); i++) {
temp.copyXC(thread[i]->getXCProxy());
temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
}
}
template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)

View File

@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
interruptPending = false;
branchPred.takeOverFrom();
}
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
if (cacheBlocked || (interruptPending && flags == 0)) {
if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
curTick);
// warn("%lli: Quiesce instruction encountered, halting fetch!",
// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#endif // FULL_SYSTEM

View File

@@ -216,6 +216,7 @@ class DefaultIEW
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
@@ -226,6 +227,7 @@ class DefaultIEW
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
@@ -450,7 +452,9 @@ class DefaultIEW
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
unsigned wbOutstanding;
/** Number of instructions in flight that will writeback. */
int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
@@ -507,6 +511,8 @@ class DefaultIEW
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */

View File

@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
iewExecutedInsts
.name(name() + ".EXEC:insts")
.name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
.name(name() + ".EXEC:loads")
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
.name(name() + ".EXEC:squashedInsts")
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
iewExecutedSwp
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
{
// Clear any state.
switchedOut = true;
assert(insts[0].empty());
assert(skidBuffer[0].empty());
instQueue.switchOut();
ldstQueue.switchOut();
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
wbNumInst = 0;
}
assert((wbCycle * wbWidth + wbNumInst) < wbMax);
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -901,6 +904,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
}
}
template <class Impl>
void
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
{
while (!insts[tid].empty()) {
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
}
toRename->iewInfo[tid].dispatched++;
insts[tid].pop();
}
}
template <class Impl>
void
DefaultIEW<Impl>::wakeCPU()
@@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
} else if (inst->isStore()) {
ldstQueue.executeStore(inst);
fault = ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
if (!inst->isStoreConditional() && fault == NoFault) {
inst->setExecuted();
instToCommit(inst);
} else if (fault != NoFault) {
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
inst->setExecuted();
instToCommit(inst);
activityThisCycle();
}
// Store conditionals will mark themselves as
@@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts()
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
if (!inst->isSquashed() && inst->isExecuted()) {
if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {

View File

@@ -479,13 +479,13 @@ class InstructionQueue
/** Distribution of number of instructions in the queue.
* @todo: Need to create struct to track the entry time for each
* instruction. */
Stats::VectorDistribution<> queueResDist;
// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction.
* @todo: Need to create struct to track the ready time for each
* instruction. */
Stats::VectorDistribution<> issueDelayDist;
// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.

View File

@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
@@ -385,8 +386,16 @@ template <class Impl>
void
InstructionQueue<Impl>::switchOut()
{
/*
if (!instList[0].empty() || (numEntries != freeEntries) ||
!readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
dumpInsts();
// assert(0);
}
*/
resetState();
dependGraph.reset();
instsToExecute.clear();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
@@ -642,9 +651,12 @@ template <class Impl>
void
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{
DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
// The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill.
if (isSwitchedOut()) {
DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
inst->seqNum);
return;
}
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
(squashed_inst->isMemRef() &&
!squashed_inst->memOpDone)) {
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
"squashed.\n",
tid, squashed_inst->seqNum, squashed_inst->readPC());
// Remove the instruction from the dependency list.
if (!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
} else {
} else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
count[squashed_inst->threadNumber]--;
++freeEntries;
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
"squashed.\n",
tid, squashed_inst->seqNum, squashed_inst->readPC());
}
instList[tid].erase(squash_it--);

View File

@@ -165,6 +165,16 @@ LSQ<Impl>::regStats()
}
}
template<class Impl>
void
LSQ<Impl>::regStats()
{
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
thread[tid].regStats();
}
}
template<class Impl>
void
LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)

View File

@@ -407,20 +407,9 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar<> lsqForwLoads;
/** Total number of loads ignored due to invalid addresses. */
Stats::Scalar<> invAddrLoads;
/** Total number of squashed loads. */
Stats::Scalar<> lsqSquashedLoads;
/** Total number of responses from the memory system that are
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;

View File

@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -220,8 +224,10 @@ void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
for (int i = 0; i < loadQueue.size(); ++i)
for (int i = 0; i < loadQueue.size(); ++i) {
assert(!loadQueue[i]);
loadQueue[i] = NULL;
}
assert(storesToWB == 0);
}
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
if (load_fault != NoFault) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
++lsqMemOrderViolation;
return genMachineCheckFault();
}

View File

@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
assert(instList[0].empty());
assert(instsToReplay.empty());
assert(memDepHash.empty());
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();

View File

@@ -417,6 +417,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
PhysRegIndex maxPhysicalRegs;
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/

View File

@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads)
numThreads(params->numberOfThreads),
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
// Be sure to mark its register as ready if it's a misc register.
if (hb_it->newPhysReg >= maxPhysicalRegs) {
scoreboard->setReg(hb_it->newPhysReg);
}
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
// Be sure to mark its register as ready if it's a misc register.
if (hb_it->newPhysReg >= maxPhysicalRegs) {
scoreboard->setReg(hb_it->newPhysReg);
}
historyBuffer[tid].erase(hb_it++);
++renameUndoneMaps;

View File

@@ -31,8 +31,11 @@
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
#include "base/callback.hh"
#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/sim_exit.hh"
class Event;
class Process;
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
#if FULL_SYSTEM
O3ThreadState(O3CPU *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
inSyscall(0), trapPending(0)
{ }
cpu(_cpu), inSyscall(0), trapPending(0)
{
if (cpu->params->profile) {
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
Callback *cb =
new MakeCallback<O3ThreadState,
&O3ThreadState::dumpFuncProfile>(this);
registerExitCallback(cb);
}
// let's fill with a dummy node for now so we don't get a segfault
// on the first cycle when there's no node available.
static ProfileNode dummyNode;
profileNode = &dummyNode;
profilePC = 3;
}
#else
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
MemObject *mem)
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
/** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
#endif
#if FULL_SYSTEM
void dumpFuncProfile()
{
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
profile->dump(xcProxy, *os);
}
#endif
};
#endif // __CPU_O3_THREAD_STATE_HH__

View File

@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
localPredictorMask = floorPow2(localPredictorSize) - 1;
if (!isPowerOf2(localHistoryTableSize)) {
fatal("Invalid local history table size!\n");
}
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
& localPredictorMask;
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
bp_history = (void *)history;
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Commented code is for doing speculative update of counters and
// all histories.
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
// Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_hist = localHistoryTable[local_history_idx];
local_predictor_idx = local_predictor_hist & localHistoryMask;
local_predictor_idx = local_predictor_hist & localPredictorMask;
// Update the choice predictor to tell it which one was correct if
// there was a prediction.
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
}
assert(globalHistory < globalPredictorSize &&
local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Update the counters and local history with the proper

View File

@@ -159,6 +159,9 @@ class TournamentBP
/** Size of the local predictor. */
unsigned localPredictorSize;
/** Mask to get the proper index bits into the predictor. */
unsigned localPredictorMask;
/** Number of bits of the local predictor's counters. */
unsigned localCtrBits;

View File

@@ -65,6 +65,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -79,6 +81,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
Param<bool> defer_registration;
Param<bool> exitOnError;
Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -95,6 +98,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(stats_reset_inst,
"blah"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -110,6 +116,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -127,7 +134,9 @@ CREATE_SIM_OBJECT(OzoneChecker)
params->max_insts_all_threads = 0;
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->stats_reset_inst = 0;
params->exitOnError = exitOnError;
params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -140,6 +149,10 @@ CREATE_SIM_OBJECT(OzoneChecker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
temp = stats_reset_inst;
Tick temp2 = progress_interval;
temp2++;
params->progress_interval = 0;
#if FULL_SYSTEM
params->itb = itb;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -81,13 +81,13 @@ template <class>
class Checker;
/**
* Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
* simple out-of-order capabilities added to it. It is still a 1 CPI machine
* (?), but is capable of handling cache misses. Basically it models having
* a ROB/IQ by only allowing a certain amount of instructions to execute while
* the cache miss is outstanding.
* Light weight out of order CPU model that approximates an out of
* order CPU. It is separated into a front end and a back end, with
* the template parameter Impl describing the classes used for each.
* The goal is to be able to specify through the Impl the class to use
* for the front end and back end, with different classes used to
* model different levels of detail.
*/
template <class Impl>
class OzoneCPU : public BaseCPU
{
@@ -273,6 +273,7 @@ class OzoneCPU : public BaseCPU
typedef OzoneThreadState<Impl> ImplState;
private:
// Committed thread state for the OzoneCPU.
OzoneThreadState<Impl> thread;
public:
@@ -310,12 +311,6 @@ class OzoneCPU : public BaseCPU
tickEvent.squash();
}
private:
Trace::InstRecord *traceData;
template<typename T>
void trace_data(T data);
public:
enum Status {
Running,
@@ -326,8 +321,6 @@ class OzoneCPU : public BaseCPU
Status _status;
public:
bool checkInterrupts;
void post_interrupt(int int_num, int index);
void zero_fill_64(Addr addr) {
@@ -379,6 +372,7 @@ class OzoneCPU : public BaseCPU
FrontEnd *frontEnd;
BackEnd *backEnd;
private:
Status status() const { return _status; }
void setStatus(Status new_status) { _status = new_status; }
@@ -410,12 +404,11 @@ class OzoneCPU : public BaseCPU
// number of idle cycles
Stats::Average<> notIdleFraction;
Stats::Formula idleFraction;
public:
public:
virtual void serialize(std::ostream &os);
virtual void unserialize(Checkpoint *cp, const std::string &section);
#if FULL_SYSTEM
/** Translates instruction requestion. */
Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
@@ -582,12 +575,9 @@ class OzoneCPU : public BaseCPU
Fault copy(Addr dest);
InstSeqNum globalSeqNum;
public:
void squashFromTC();
// @todo: This can be a useful debug function. Implement it.
void dumpInsts() { frontEnd->dumpInsts(); }
#if FULL_SYSTEM
@@ -605,7 +595,6 @@ class OzoneCPU : public BaseCPU
ThreadContext *tcBase() { return tc; }
bool decoupledFrontEnd;
struct CommStruct {
InstSeqNum doneSeqNum;
InstSeqNum nonSpecSeqNum;
@@ -614,8 +603,13 @@ class OzoneCPU : public BaseCPU
bool stall;
};
InstSeqNum globalSeqNum;
TimeBuffer<CommStruct> comm;
bool decoupledFrontEnd;
bool lockFlag;
Stats::Scalar<> quiesceCycles;

View File

@@ -63,6 +63,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
//SimObjectParam<PageTable *> page_table;
@@ -76,16 +77,19 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
//SimObjectParam<BaseCache *> icache;
//SimObjectParam<BaseCache *> dcache;
Param<unsigned> cachePorts;
Param<unsigned> width;
Param<unsigned> frontEndLatency;
Param<unsigned> frontEndWidth;
Param<unsigned> backEndLatency;
Param<unsigned> backEndWidth;
Param<unsigned> backEndSquashLatency;
Param<unsigned> backEndLatency;
Param<unsigned> maxInstBufferSize;
Param<unsigned> numPhysicalRegs;
Param<unsigned> maxOutstandingMemOps;
@@ -140,6 +144,7 @@ Param<unsigned> RASSize;
Param<unsigned> LQEntries;
Param<unsigned> SQEntries;
Param<bool> lsqLimits;
Param<unsigned> LFSTSize;
Param<unsigned> SSITSize;
@@ -181,6 +186,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
// INIT_PARAM(page_table, "Page table"),
@@ -204,16 +210,21 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
"Terminate when all threads have reached this load"
"count",
0),
INIT_PARAM_DFLT(stats_reset_inst,
"blah",
0),
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
INIT_PARAM_DFLT(width, "Width", 1),
INIT_PARAM_DFLT(frontEndLatency, "Front end latency", 1),
INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
@@ -274,6 +285,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM(LQEntries, "Number of load queue entries"),
INIT_PARAM(SQEntries, "Number of store queue entries"),
INIT_PARAM_DFLT(lsqLimits, "LSQ size limits dispatch", true),
INIT_PARAM(LFSTSize, "Last fetched store table size"),
INIT_PARAM(SSITSize, "Store set ID table size"),
@@ -336,6 +348,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
#else
params->workload = workload;
// params->pTable = page_table;
@@ -347,6 +360,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
params->stats_reset_inst = stats_reset_inst;
params->progress_interval = progress_interval;
//
// Caches
@@ -357,6 +372,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->width = width;
params->frontEndWidth = frontEndWidth;
params->frontEndLatency = frontEndLatency;
params->backEndWidth = backEndWidth;
params->backEndSquashLatency = backEndSquashLatency;
params->backEndLatency = backEndLatency;
@@ -414,6 +430,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->LQEntries = LQEntries;
params->SQEntries = SQEntries;
params->lsqLimits = lsqLimits;
params->SSITSize = SSITSize;
params->LFSTSize = LFSTSize;

View File

@@ -50,7 +50,6 @@
#include "arch/alpha/types.hh"
#include "arch/vtophys.hh"
#include "base/callback.hh"
//#include "base/remote_gdb.hh"
#include "cpu/profile.hh"
#include "kern/kernel_stats.hh"
#include "sim/faults.hh"
@@ -67,15 +66,6 @@
using namespace TheISA;
template <class Impl>
template<typename T>
void
OzoneCPU<Impl>::trace_data(T data) {
if (traceData) {
traceData->setData(data);
}
}
template <class Impl>
OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
: Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
@@ -112,7 +102,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
_status = Idle;
if (p->checker) {
#if USE_CHECKER
BaseCPU *temp_checker = p->checker;
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
checker->setMemory(mem);
@@ -126,6 +116,8 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
panic("Checker enabled but not compiled in!");
#endif
} else {
// If checker is not being used, then the xcProxy points
// directly to the CPU's ExecContext.
checker = NULL;
thread.tc = &ozoneTC;
tc = &ozoneTC;
@@ -138,7 +130,7 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setStatus(ThreadContext::Suspended);
#if FULL_SYSTEM
/***** All thread state stuff *****/
// Setup thread state stuff.
thread.cpu = this;
thread.setTid(0);
@@ -187,12 +179,15 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
frontEnd->setBackEnd(backEnd);
backEnd->setFrontEnd(frontEnd);
decoupledFrontEnd = p->decoupledFrontEnd;
globalSeqNum = 1;
#if FULL_SYSTEM
checkInterrupts = false;
#endif
lockFlag = 0;
// Setup rename table, initializing all values to ready.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
thread.renameTable[i] = new DynInst(this);
thread.renameTable[i]->setResultReady();
@@ -233,8 +228,6 @@ OzoneCPU<Impl>::OzoneCPU(Params *p)
thread.setVirtPort(virt_port);
#endif
lockFlag = 0;
DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
}
@@ -247,6 +240,7 @@ template <class Impl>
void
OzoneCPU<Impl>::switchOut()
{
BaseCPU::switchOut(_sampler);
switchCount = 0;
// Front end needs state from back end, so switch out the back end first.
backEnd->switchOut();
@@ -257,6 +251,8 @@ template <class Impl>
void
OzoneCPU<Impl>::signalSwitched()
{
// Only complete the switchout when both the front end and back
// end have signalled they are ready to switch.
if (++switchCount == 2) {
backEnd->doSwitchOut();
frontEnd->doSwitchOut();
@@ -266,6 +262,17 @@ OzoneCPU<Impl>::signalSwitched()
#endif
_status = SwitchedOut;
#ifndef NDEBUG
// Loop through all registers
for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
assert(thread.renameTable[i] == frontEnd->renameTable[i]);
assert(thread.renameTable[i] == backEnd->renameTable[i]);
DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
}
#endif
if (tickEvent.scheduled())
tickEvent.squash();
}
@@ -278,13 +285,25 @@ OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
BaseCPU::takeOverFrom(oldCPU);
thread.trapPending = false;
thread.inSyscall = false;
backEnd->takeOverFrom();
frontEnd->takeOverFrom();
frontEnd->renameTable.copyFrom(thread.renameTable);
backEnd->renameTable.copyFrom(thread.renameTable);
assert(!tickEvent.scheduled());
#ifndef NDEBUG
// Check rename table.
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
assert(thread.renameTable[i]->isResultReady());
}
#endif
// @todo: Fix hardcoded number
// Clear out any old information in time buffer.
for (int i = 0; i < 6; ++i) {
for (int i = 0; i < 15; ++i) {
comm.advance();
}
@@ -316,6 +335,10 @@ OzoneCPU<Impl>::activateContext(int thread_num, int delay)
notIdleFraction++;
scheduleTickEvent(delay);
_status = Running;
#if FULL_SYSTEM
if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
thread.quiesceEvent->deschedule();
#endif
thread.setStatus(ThreadContext::Active);
frontEnd->wakeFromQuiesce();
}
@@ -393,7 +416,7 @@ template <class Impl>
void
OzoneCPU<Impl>::resetStats()
{
startNumInst = numInst;
// startNumInst = numInst;
notIdleFraction = (_status != Idle);
}
@@ -441,6 +464,15 @@ OzoneCPU<Impl>::serialize(std::ostream &os)
ozoneTC.serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
// Use SimpleThread's ability to checkpoint to make it easier to
// write out the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
nameOut(os, csprintf("%s.xc.0", name()));
temp.copyXC(thread.getXCProxy());
temp.serialize(os);
}
template <class Impl>
@@ -451,6 +483,15 @@ OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
UNSERIALIZE_ENUM(_status);
ozoneTC.unserialize(cp, csprintf("%s.tc", section));
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
// Use SimpleThread's ability to checkpoint to make it easier to
// read in the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
temp.copyXC(thread.getXCProxy());
temp.unserialize(cp, csprintf("%s.xc.0", section));
thread.getXCProxy()->copyArchRegs(temp.getProxy());
}
template <class Impl>
@@ -810,7 +851,9 @@ OzoneCPU<Impl>::OzoneTC::halt()
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::dumpFuncProfile()
{ }
{
thread->dumpFuncProfile();
}
#endif
template <class Impl>
@@ -829,6 +872,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
copyArchRegs(old_context);
setCpuId(old_context->readCpuId());
thread->inst = old_context->getInst();
#if !FULL_SYSTEM
setFuncExeInst(old_context->readFuncExeInst());
#else
@@ -842,6 +886,7 @@ OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
thread->quiesceEvent->tc = this;
}
// Copy kernel stats pointer from old context.
thread->kernelStats = old_context->getKernelStats();
// storeCondFailures = 0;
cpu->lockFlag = false;
@@ -863,7 +908,11 @@ OzoneCPU<Impl>::OzoneTC::regStats(const std::string &name)
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::serialize(std::ostream &os)
{ }
{
// Once serialization is added, serialize the quiesce event and
// kernel stats. Will need to make sure there aren't multiple
// things that serialize them.
}
template <class Impl>
void
@@ -896,16 +945,14 @@ template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::profileClear()
{
if (thread->profile)
thread->profile->clear();
thread->profileClear();
}
template <class Impl>
void
OzoneCPU<Impl>::OzoneTC::profileSample()
{
if (thread->profile)
thread->profile->sample(thread->profileNode, thread->profilePC);
thread->profileSample();
}
#endif
@@ -916,7 +963,6 @@ OzoneCPU<Impl>::OzoneTC::getThreadNum()
return thread->readTid();
}
// Also somewhat obnoxious. Really only used for the TLB fault.
template <class Impl>
TheISA::MachInst
OzoneCPU<Impl>::OzoneTC::getInst()
@@ -934,14 +980,20 @@ OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
cpu->frontEnd->setPC(thread->PC);
cpu->frontEnd->setNextPC(thread->nextPC);
for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
if (i < TheISA::FP_Base_DepTag) {
thread->renameTable[i]->setIntResult(tc->readIntReg(i));
} else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
int fp_idx = i - TheISA::FP_Base_DepTag;
thread->renameTable[i]->setDoubleResult(
tc->readFloatReg(fp_idx, 64));
}
// First loop through the integer registers.
for (int i = 0; i < TheISA::NumIntRegs; ++i) {
/* DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
"now has data %lli.\n",
i, thread->renameTable[i]->readIntResult(),
tc->readIntReg(i));
*/
thread->renameTable[i]->setIntResult(tc->readIntReg(i));
}
// Then loop through the floating point registers.
for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
int fp_idx = i + TheISA::FP_Base_DepTag;
thread->renameTable[fp_idx]->setIntResult(tc->readFloatRegBits(i));
}
#if !FULL_SYSTEM

View File

@@ -34,6 +34,7 @@
#include <deque>
#include "arch/utility.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/bpred_unit.hh"
#include "cpu/ozone/rename_table.hh"
@@ -246,15 +247,21 @@ class FrontEnd
void dumpInsts();
private:
TimeBuffer<int> numInstsReady;
typedef typename std::deque<DynInstPtr> InstBuff;
typedef typename InstBuff::iterator InstBuffIt;
InstBuff feBuffer;
InstBuff instBuffer;
int instBufferSize;
int maxInstBufferSize;
int latency;
int width;
int freeRegs;

View File

@@ -92,8 +92,10 @@ FrontEnd<Impl>::FrontEnd(Params *params)
: branchPred(params),
icachePort(this),
mem(params->mem),
numInstsReady(params->frontEndLatency, 0),
instBufferSize(0),
maxInstBufferSize(params->maxInstBufferSize),
latency(params->frontEndLatency),
width(params->frontEndWidth),
freeRegs(params->numPhysicalRegs),
numPhysRegs(params->numPhysicalRegs),
@@ -326,6 +328,18 @@ FrontEnd<Impl>::tick()
if (switchedOut)
return;
for (int insts_to_queue = numInstsReady[-latency];
!instBuffer.empty() && insts_to_queue;
--insts_to_queue)
{
DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
instBuffer.front()->seqNum);
feBuffer.push_back(instBuffer.front());
instBuffer.pop_front();
}
numInstsReady.advance();
// @todo: Maybe I want to just have direct communication...
if (fromCommit->doneSeqNum) {
branchPred.update(fromCommit->doneSeqNum, 0);
@@ -339,8 +353,8 @@ FrontEnd<Impl>::tick()
cacheBlkValid = true;
status = Running;
if (barrierInst)
status = SerializeBlocked;
// if (barrierInst)
// status = SerializeBlocked;
if (freeRegs <= 0)
status = RenameBlocked;
checkBE();
@@ -414,11 +428,12 @@ FrontEnd<Impl>::tick()
// latency
instBuffer.push_back(inst);
++instBufferSize;
numInstsReady[0]++;
++num_inst;
#if FULL_SYSTEM
if (inst->isQuiesce()) {
warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
// warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
status = QuiescePending;
break;
}
@@ -572,10 +587,10 @@ FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
// Change status over to SerializeBlocked so that other stages know
// what this is blocked on.
status = SerializeBlocked;
// status = SerializeBlocked;
barrierInst = inst;
return true;
// barrierInst = inst;
// return true;
} else if ((inst->isStoreConditional() || inst->isSerializeAfter())
&& !inst->isSerializeHandled()) {
DPRINTF(FE, "Serialize after instruction encountered.\n");
@@ -620,6 +635,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
instruction->fault = fault;
instruction->setCanIssue();
instBuffer.push_back(instruction);
numInstsReady[0]++;
++instBufferSize;
}
@@ -649,6 +665,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
freeRegs+= inst->numDestRegs();
}
while (!feBuffer.empty() &&
feBuffer.back()->seqNum > squash_num) {
DynInstPtr inst = feBuffer.back();
DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
inst->seqNum, inst->readPC());
inst->clearDependents();
feBuffer.pop_back();
--instBufferSize;
freeRegs+= inst->numDestRegs();
}
// Copy over rename table from the back end.
renameTable.copyFrom(backEnd->renameTable);
@@ -666,12 +697,12 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
DPRINTF(FE, "Squashing outstanding Icache access.\n");
memReq = NULL;
}
/*
if (status == SerializeBlocked) {
assert(barrierInst->seqNum > squash_num);
barrierInst = NULL;
}
*/
// Unless this squash originated from the front end, we're probably
// in running mode now.
// Actually might want to make this latency dependent.
@@ -683,13 +714,22 @@ template <class Impl>
typename Impl::DynInstPtr
FrontEnd<Impl>::getInst()
{
if (instBufferSize == 0) {
if (feBuffer.empty()) {
return NULL;
}
DynInstPtr inst = instBuffer.front();
DynInstPtr inst = feBuffer.front();
instBuffer.pop_front();
if (inst->isSerializeBefore() || inst->isIprAccess()) {
DPRINTF(FE, "Back end is getting a serialize before inst\n");
if (!backEnd->robEmpty()) {
DPRINTF(FE, "Rob is not empty yet, not returning inst\n");
return NULL;
}
inst->clearSerializeBefore();
}
feBuffer.pop_front();
--instBufferSize;
@@ -784,11 +824,11 @@ FrontEnd<Impl>::updateStatus()
}
if (status == BEBlocked && !be_block) {
if (barrierInst) {
status = SerializeBlocked;
} else {
// if (barrierInst) {
// status = SerializeBlocked;
// } else {
status = Running;
}
// }
ret_val = true;
}
return ret_val;
@@ -810,6 +850,7 @@ template <class Impl>
typename Impl::DynInstPtr
FrontEnd<Impl>::getInstFromCacheline()
{
/*
if (status == SerializeComplete) {
DynInstPtr inst = barrierInst;
status = Running;
@@ -817,7 +858,7 @@ FrontEnd<Impl>::getInstFromCacheline()
inst->clearSerializeBefore();
return inst;
}
*/
InstSeqNum inst_seq;
MachInst inst;
// @todo: Fix this magic number used here to handle word offset (and
@@ -932,6 +973,7 @@ FrontEnd<Impl>::doSwitchOut()
squash(0, 0);
instBuffer.clear();
instBufferSize = 0;
feBuffer.clear();
status = Idle;
}

View File

@@ -284,7 +284,7 @@ InorderBackEnd<Impl>::executeInsts()
}
inst->setExecuted();
inst->setCompleted();
inst->setResultReady();
inst->setCanCommit();
instList.pop_front();

View File

@@ -850,13 +850,13 @@ template <class Impl>
void
InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
{
OpClass op_class = ready_inst->opClass();
// OpClass op_class = ready_inst->opClass();
readyInsts.push(ready_inst);
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
ready_inst->readPC(), op_class, ready_inst->seqNum);
ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
}
/*
template <class Impl>
@@ -1177,11 +1177,11 @@ InstQueue<Impl>::addIfReady(DynInstPtr &inst)
return;
}
OpClass op_class = inst->opClass();
// OpClass op_class = inst->opClass();
DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
"the ready list, PC %#x opclass:%i [sn:%lli].\n",
inst->readPC(), op_class, inst->seqNum);
inst->readPC(), inst->opClass(), inst->seqNum);
readyInsts.push(inst);
}

View File

@@ -80,7 +80,7 @@ class LWBackEnd
TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c;
TimeBuffer<Writeback> numInstsToWB;
TimeBuffer<int> numInstsToWB;
TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW;
@@ -139,7 +139,7 @@ class LWBackEnd
Tick lastCommitCycle;
bool robEmpty() { return instList.empty(); }
bool robEmpty() { return numInsts == 0; }
bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@@ -194,6 +194,7 @@ class LWBackEnd
}
void instToCommit(DynInstPtr &inst);
void readyInstsForCommit();
void switchOut();
void doSwitchOut();
@@ -255,12 +256,13 @@ class LWBackEnd
RenameTable<Impl> renameTable;
private:
int latency;
// General back end width. Used if the more specific isn't given.
int width;
// Dispatch width.
int dispatchWidth;
int numDispatchEntries;
int dispatchSize;
int waitingInsts;
@@ -285,6 +287,7 @@ class LWBackEnd
int numROBEntries;
int numInsts;
bool lsqLimits;
std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt;
@@ -295,9 +298,6 @@ class LWBackEnd
InstSeqNum squashSeqNum;
Addr squashNextPC;
Fault faultFromFetch;
bool fetchHasFault;
bool switchedOut;
bool switchPending;
@@ -321,8 +321,6 @@ class LWBackEnd
std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback;
int latency;
int squashLatency;
bool exactFullStall;
@@ -331,37 +329,39 @@ class LWBackEnd
/* Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
*/
Stats::Vector<> rob_cap_events;
Stats::Vector<> rob_cap_inst_count;
Stats::Vector<> iq_cap_events;
Stats::Vector<> iq_cap_inst_count;
Stats::Vector<> robCapEvents;
Stats::Vector<> robCapInstCount;
Stats::Vector<> iqCapEvents;
Stats::Vector<> iqCapInstCount;
// total number of instructions executed
Stats::Vector<> exe_inst;
Stats::Vector<> exe_swp;
Stats::Vector<> exe_nop;
Stats::Vector<> exe_refs;
Stats::Vector<> exe_loads;
Stats::Vector<> exe_branches;
Stats::Vector<> exeInst;
Stats::Vector<> exeSwp;
Stats::Vector<> exeNop;
Stats::Vector<> exeRefs;
Stats::Vector<> exeLoads;
Stats::Vector<> exeBranches;
Stats::Vector<> issued_ops;
Stats::Vector<> issuedOps;
// total number of loads forwaded from LSQ stores
Stats::Vector<> lsq_forw_loads;
Stats::Vector<> lsqForwLoads;
// total number of loads ignored due to invalid addresses
Stats::Vector<> inv_addr_loads;
Stats::Vector<> invAddrLoads;
// total number of software prefetches ignored due to invalid addresses
Stats::Vector<> inv_addr_swpfs;
Stats::Vector<> invAddrSwpfs;
// ready loads blocked due to memory disambiguation
Stats::Vector<> lsq_blocked_loads;
Stats::Vector<> lsqBlockedLoads;
Stats::Scalar<> lsqInversion;
Stats::Vector<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist;
Stats::Vector<> nIssuedDist;
/*
Stats::VectorDistribution<> issueDelayDist;
Stats::VectorDistribution<> queue_res_dist;
Stats::VectorDistribution<> queueResDist;
*/
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
@@ -379,37 +379,37 @@ class LWBackEnd
Stats::Formula commit_ipb;
Stats::Formula lsq_inv_rate;
*/
Stats::Vector<> writeback_count;
Stats::Vector<> producer_inst;
Stats::Vector<> consumer_inst;
Stats::Vector<> wb_penalized;
Stats::Vector<> writebackCount;
Stats::Vector<> producerInst;
Stats::Vector<> consumerInst;
Stats::Vector<> wbPenalized;
Stats::Formula wb_rate;
Stats::Formula wb_fanout;
Stats::Formula wb_penalized_rate;
Stats::Formula wbRate;
Stats::Formula wbFanout;
Stats::Formula wbPenalizedRate;
// total number of instructions committed
Stats::Vector<> stat_com_inst;
Stats::Vector<> stat_com_swp;
Stats::Vector<> stat_com_refs;
Stats::Vector<> stat_com_loads;
Stats::Vector<> stat_com_membars;
Stats::Vector<> stat_com_branches;
Stats::Vector<> statComInst;
Stats::Vector<> statComSwp;
Stats::Vector<> statComRefs;
Stats::Vector<> statComLoads;
Stats::Vector<> statComMembars;
Stats::Vector<> statComBranches;
Stats::Distribution<> n_committed_dist;
Stats::Distribution<> nCommittedDist;
Stats::Scalar<> commit_eligible_samples;
Stats::Vector<> commit_eligible;
Stats::Scalar<> commitEligibleSamples;
Stats::Vector<> commitEligible;
Stats::Vector<> squashedInsts;
Stats::Vector<> ROBSquashedInsts;
Stats::Scalar<> ROB_fcount;
Stats::Formula ROB_full_rate;
Stats::Scalar<> ROBFcount;
Stats::Formula ROBFullRate;
Stats::Vector<> ROB_count; // cumulative ROB occupancy
Stats::Formula ROB_occ_rate;
Stats::VectorDistribution<> ROB_occ_dist;
Stats::Vector<> ROBCount; // cumulative ROB occupancy
Stats::Formula ROBOccRate;
// Stats::VectorDistribution<> ROBOccDist;
public:
void dumpInsts();

View File

@@ -141,13 +141,14 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
template <class Impl>
LWBackEnd<Impl>::LWBackEnd(Params *params)
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
: d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
trapSquash(false), tcSquash(false),
width(params->backEndWidth), exactFullStall(true)
latency(params->backEndLatency),
width(params->backEndWidth), lsqLimits(params->lsqLimits),
exactFullStall(true)
{
numROBEntries = params->numROBEntries;
numInsts = 0;
numDispatchEntries = 32;
maxOutstandingMemOps = params->maxOutstandingMemOps;
numWaitingMemOps = 0;
waitingInsts = 0;
@@ -184,78 +185,79 @@ void
LWBackEnd<Impl>::regStats()
{
using namespace Stats;
rob_cap_events
LSQ.regStats();
robCapEvents
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_events")
.desc("number of cycles where ROB cap was active")
.flags(total)
;
rob_cap_inst_count
robCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:cap_inst")
.desc("number of instructions held up by ROB cap")
.flags(total)
;
iq_cap_events
iqCapEvents
.init(cpu->number_of_threads)
.name(name() +".IQ:cap_events" )
.desc("number of cycles where IQ cap was active")
.flags(total)
;
iq_cap_inst_count
iqCapInstCount
.init(cpu->number_of_threads)
.name(name() + ".IQ:cap_inst")
.desc("number of instructions held up by IQ cap")
.flags(total)
;
exe_inst
exeInst
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:count")
.desc("number of insts issued")
.flags(total)
;
exe_swp
exeSwp
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:swp")
.desc("number of swp insts issued")
.flags(total)
;
exe_nop
exeNop
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:nop")
.desc("number of nop insts issued")
.flags(total)
;
exe_refs
exeRefs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:refs")
.desc("number of memory reference insts issued")
.flags(total)
;
exe_loads
exeLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:loads")
.desc("number of load insts issued")
.flags(total)
;
exe_branches
exeBranches
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:branches")
.desc("Number of branches issued")
.flags(total)
;
issued_ops
issuedOps
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:op_count")
.desc("number of insts issued")
@@ -272,28 +274,28 @@ LWBackEnd<Impl>::regStats()
//
// Other stats
//
lsq_forw_loads
lsqForwLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:forw_loads")
.desc("number of loads forwarded via LSQ")
.flags(total)
;
inv_addr_loads
invAddrLoads
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_loads")
.desc("number of invalid-address loads")
.flags(total)
;
inv_addr_swpfs
invAddrSwpfs
.init(cpu->number_of_threads)
.name(name() + ".ISSUE:addr_swpfs")
.desc("number of invalid-address SW prefetches")
.flags(total)
;
lsq_blocked_loads
lsqBlockedLoads
.init(cpu->number_of_threads)
.name(name() + ".LSQ:blocked_loads")
.desc("number of ready loads not issued due to memory disambiguation")
@@ -305,51 +307,52 @@ LWBackEnd<Impl>::regStats()
.desc("Number of times LSQ instruction issued early")
;
n_issued_dist
nIssuedDist
.init(issueWidth + 1)
.name(name() + ".ISSUE:issued_per_cycle")
.desc("Number of insts issued each cycle")
.flags(total | pdf | dist)
;
issue_delay_dist
/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
.desc("cycles from operands ready to issue")
.flags(pdf | cdf)
;
queue_res_dist
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
.desc("cycles from dispatch to issue")
.flags(total | pdf | cdf )
;
for (int i = 0; i < Num_OpClasses; ++i) {
queue_res_dist.subname(i, opClassStrings[i]);
queueResDist.subname(i, opClassStrings[i]);
}
writeback_count
*/
writebackCount
.init(cpu->number_of_threads)
.name(name() + ".WB:count")
.desc("cumulative count of insts written-back")
.flags(total)
;
producer_inst
producerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:producers")
.desc("num instructions producing a value")
.flags(total)
;
consumer_inst
consumerInst
.init(cpu->number_of_threads)
.name(name() + ".WB:consumers")
.desc("num instructions consuming a value")
.flags(total)
;
wb_penalized
wbPenalized
.init(cpu->number_of_threads)
.name(name() + ".WB:penalized")
.desc("number of instrctions required to write to 'other' IQ")
@@ -357,71 +360,71 @@ LWBackEnd<Impl>::regStats()
;
wb_penalized_rate
wbPenalizedRate
.name(name() + ".WB:penalized_rate")
.desc ("fraction of instructions written-back that wrote to 'other' IQ")
.flags(total)
;
wb_penalized_rate = wb_penalized / writeback_count;
wbPenalizedRate = wbPenalized / writebackCount;
wb_fanout
wbFanout
.name(name() + ".WB:fanout")
.desc("average fanout of values written-back")
.flags(total)
;
wb_fanout = producer_inst / consumer_inst;
wbFanout = producerInst / consumerInst;
wb_rate
wbRate
.name(name() + ".WB:rate")
.desc("insts written-back per cycle")
.flags(total)
;
wb_rate = writeback_count / cpu->numCycles;
wbRate = writebackCount / cpu->numCycles;
stat_com_inst
statComInst
.init(cpu->number_of_threads)
.name(name() + ".COM:count")
.desc("Number of instructions committed")
.flags(total)
;
stat_com_swp
statComSwp
.init(cpu->number_of_threads)
.name(name() + ".COM:swp_count")
.desc("Number of s/w prefetches committed")
.flags(total)
;
stat_com_refs
statComRefs
.init(cpu->number_of_threads)
.name(name() + ".COM:refs")
.desc("Number of memory references committed")
.flags(total)
;
stat_com_loads
statComLoads
.init(cpu->number_of_threads)
.name(name() + ".COM:loads")
.desc("Number of loads committed")
.flags(total)
;
stat_com_membars
statComMembars
.init(cpu->number_of_threads)
.name(name() + ".COM:membars")
.desc("Number of memory barriers committed")
.flags(total)
;
stat_com_branches
statComBranches
.init(cpu->number_of_threads)
.name(name() + ".COM:branches")
.desc("Number of branches committed")
.flags(total)
;
n_committed_dist
nCommittedDist
.init(0,commitWidth,1)
.name(name() + ".COM:committed_per_cycle")
.desc("Number of insts commited each cycle")
@@ -441,14 +444,14 @@ LWBackEnd<Impl>::regStats()
// -> The standard deviation is computed only over cycles where
// we reached the BW limit
//
commit_eligible
commitEligible
.init(cpu->number_of_threads)
.name(name() + ".COM:bw_limited")
.desc("number of insts not committed due to BW limits")
.flags(total)
;
commit_eligible_samples
commitEligibleSamples
.name(name() + ".COM:bw_lim_events")
.desc("number cycles where commit BW limit reached")
;
@@ -465,37 +468,38 @@ LWBackEnd<Impl>::regStats()
.desc("Number of instructions removed from inst list when they reached the head of the ROB")
;
ROB_fcount
ROBFcount
.name(name() + ".ROB:full_count")
.desc("number of cycles where ROB was full")
;
ROB_count
ROBCount
.init(cpu->number_of_threads)
.name(name() + ".ROB:occupancy")
.desc(name() + ".ROB occupancy (cumulative)")
.flags(total)
;
ROB_full_rate
ROBFullRate
.name(name() + ".ROB:full_rate")
.desc("ROB full per cycle")
;
ROB_full_rate = ROB_fcount / cpu->numCycles;
ROBFullRate = ROBFcount / cpu->numCycles;
ROB_occ_rate
ROBOccRate
.name(name() + ".ROB:occ_rate")
.desc("ROB occupancy rate")
.flags(total)
;
ROB_occ_rate = ROB_count / cpu->numCycles;
ROB_occ_dist
ROBOccRate = ROBCount / cpu->numCycles;
/*
ROBOccDist
.init(cpu->number_of_threads,0,numROBEntries,2)
.name(name() + ".ROB:occ_dist")
.desc("ROB Occupancy per cycle")
.flags(total | cdf)
;
*/
}
template <class Impl>
@@ -588,18 +592,22 @@ LWBackEnd<Impl>::tick()
{
DPRINTF(BE, "Ticking back end\n");
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
cpu->signalSwitched();
return;
}
ROB_count[0]+= numInsts;
readyInstsForCommit();
numInstsToWB.advance();
ROBCount[0]+= numInsts;
wbCycle = 0;
// Read in any done instruction information and update the IQ or LSQ.
updateStructures();
#if FULL_SYSTEM
checkInterrupts();
#endif
@@ -674,6 +682,10 @@ LWBackEnd<Impl>::dispatchInsts()
while (numInsts < numROBEntries &&
numWaitingMemOps < maxOutstandingMemOps) {
// Get instruction from front of time buffer
if (lsqLimits && LSQ.isFull()) {
break;
}
DynInstPtr inst = frontEnd->getInst();
if (!inst) {
break;
@@ -732,6 +744,7 @@ LWBackEnd<Impl>::dispatchInsts()
inst->setIssued();
inst->setExecuted();
inst->setCanCommit();
numInstsToWB[0]++;
} else {
DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
"exeList.\n",
@@ -866,8 +879,17 @@ LWBackEnd<Impl>::executeInsts()
if (inst->isLoad()) {
LSQ.executeLoad(inst);
} else if (inst->isStore()) {
LSQ.executeStore(inst);
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
Fault fault = LSQ.executeStore(inst);
if (!inst->isStoreConditional() && fault == NoFault) {
inst->setExecuted();
instToCommit(inst);
} else if (fault != NoFault) {
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
inst->setExecuted();
instToCommit(inst);
@@ -908,36 +930,54 @@ LWBackEnd<Impl>::executeInsts()
}
}
issued_ops[0]+= num_executed;
n_issued_dist[num_executed]++;
issuedOps[0]+= num_executed;
nIssuedDist[num_executed]++;
}
template<class Impl>
void
LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
{
DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
if (!inst->isSquashed()) {
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
inst->setCanCommit();
if (inst->isExecuted()) {
inst->setResultReady();
int dependents = wakeDependents(inst);
if (dependents) {
producer_inst[0]++;
consumer_inst[0]+= dependents;
producerInst[0]++;
consumerInst[0]+= dependents;
}
}
}
writeback_count[0]++;
writeback.push_back(inst);
numInstsToWB[0]++;
writebackCount[0]++;
}
template <class Impl>
void
LWBackEnd<Impl>::readyInstsForCommit()
{
for (int i = numInstsToWB[-latency];
!writeback.empty() && i;
--i)
{
DynInstPtr inst = writeback.front();
writeback.pop_front();
if (!inst->isSquashed()) {
DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
inst->seqNum, inst->readPC());
inst->setCanCommit();
}
}
}
#if 0
template <class Impl>
void
@@ -1010,7 +1050,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
// or store inst. Signal backwards that it should be executed.
if (!inst->isExecuted()) {
if (inst->isNonSpeculative() ||
inst->isStoreConditional() ||
(inst->isStoreConditional() && inst->getFault() == NoFault) ||
inst->isMemBarrier() ||
inst->isWriteBarrier()) {
#if !FULL_SYSTEM
@@ -1151,6 +1191,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
++freed_regs;
}
#if FULL_SYSTEM
if (thread->profile) {
// bool usermode =
// (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
// thread->profilePC = usermode ? 1 : inst->readPC();
thread->profilePC = inst->readPC();
ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
inst->staticInst);
if (node)
thread->profileNode = node;
}
#endif
if (inst->traceData) {
inst->traceData->setFetchSeq(inst->seqNum);
inst->traceData->setCPSeq(thread->numInst);
@@ -1158,6 +1212,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
inst->traceData = NULL;
}
if (inst->isCopy())
panic("Should not commit any copy instructions!");
inst->clearDependents();
frontEnd->addFreeRegs(freed_regs);
@@ -1207,9 +1264,9 @@ LWBackEnd<Impl>::commitInsts()
while (!instList.empty() && inst_num < commitWidth) {
if (instList.back()->isSquashed()) {
instList.back()->clearDependents();
ROBSquashedInsts[instList.back()->threadNumber]++;
instList.pop_back();
--numInsts;
ROBSquashedInsts[instList.back()->threadNumber]++;
continue;
}
@@ -1221,7 +1278,7 @@ LWBackEnd<Impl>::commitInsts()
break;
}
}
n_committed_dist.sample(inst_num);
nCommittedDist.sample(inst_num);
}
template <class Impl>
@@ -1231,10 +1288,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
LSQ.squash(sn);
int freed_regs = 0;
InstListIt waiting_list_end = waitingList.end();
InstListIt insts_end_it = waitingList.end();
InstListIt insts_it = waitingList.begin();
while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
++insts_it;
@@ -1260,6 +1317,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
if ((*insts_it)->isSquashed()) {
panic("Instruction should not be already squashed and on list!");
++insts_it;
continue;
}
@@ -1291,18 +1349,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
--numInsts;
}
insts_it = waitingList.begin();
while (!waitingList.empty() && insts_it != waitingList.end()) {
if ((*insts_it)->seqNum < sn) {
++insts_it;
continue;
}
assert((*insts_it)->isSquashed());
waitingList.erase(insts_it++);
waitingInsts--;
}
while (memBarrier && memBarrier->seqNum > sn) {
DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
"squashed)\n", memBarrier->seqNum);
@@ -1320,6 +1366,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
}
}
insts_it = replayList.begin();
insts_end_it = replayList.end();
while (!replayList.empty() && insts_it != insts_end_it) {
if ((*insts_it)->seqNum < sn) {
++insts_it;
continue;
}
assert((*insts_it)->isSquashed());
replayList.erase(insts_it++);
}
frontEnd->addFreeRegs(freed_regs);
}
@@ -1390,14 +1448,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
frontEnd->squash(inst->seqNum - 1, inst->readPC());
}
template <class Impl>
void
LWBackEnd<Impl>::fetchFault(Fault &fault)
{
faultFromFetch = fault;
fetchHasFault = true;
}
template <class Impl>
void
LWBackEnd<Impl>::switchOut()
@@ -1416,17 +1466,25 @@ LWBackEnd<Impl>::doSwitchOut()
// yet written back.
assert(robEmpty());
assert(!LSQ.hasStoresToWB());
writeback.clear();
for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
numInstsToWB.advance();
// squash(0);
assert(waitingList.empty());
assert(instList.empty());
assert(replayList.empty());
assert(writeback.empty());
LSQ.switchOut();
squash(0);
}
template <class Impl>
void
LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
{
switchedOut = false;
assert(!squashPending);
squashSeqNum = 0;
squashNextPC = 0;
tcSquash = false;
trapSquash = false;
@@ -1451,27 +1509,27 @@ LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch())
exe_swp[thread_number]++;
exeSwp[thread_number]++;
else
exe_inst[thread_number]++;
exeInst[thread_number]++;
#else
exe_inst[thread_number]++;
exeInst[thread_number]++;
#endif
//
// Control operations
//
if (inst->isControl())
exe_branches[thread_number]++;
exeBranches[thread_number]++;
//
// Memory operations
//
if (inst->isMemRef()) {
exe_refs[thread_number]++;
exeRefs[thread_number]++;
if (inst->isLoad())
exe_loads[thread_number]++;
exeLoads[thread_number]++;
}
}
@@ -1491,33 +1549,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
//
#ifdef TARGET_ALPHA
if (inst->isDataPrefetch()) {
stat_com_swp[tid]++;
statComSwp[tid]++;
} else {
stat_com_inst[tid]++;
statComInst[tid]++;
}
#else
stat_com_inst[tid]++;
statComInst[tid]++;
#endif
//
// Control Instructions
//
if (inst->isControl())
stat_com_branches[tid]++;
statComBranches[tid]++;
//
// Memory references
//
if (inst->isMemRef()) {
stat_com_refs[tid]++;
statComRefs[tid]++;
if (inst->isLoad()) {
stat_com_loads[tid]++;
statComLoads[tid]++;
}
}
if (inst->isMemBarrier()) {
stat_com_membars[tid]++;
statComMembars[tid]++;
}
}
@@ -1569,6 +1627,45 @@ LWBackEnd<Impl>::dumpInsts()
++num;
}
inst_list_it = --(writeback.end());
cprintf("Writeback list size: %i\n", writeback.size());
while (inst_list_it != writeback.end())
{
cprintf("Instruction:%i\n",
num);
if (!(*inst_list_it)->isSquashed()) {
if (!(*inst_list_it)->isIssued()) {
++valid_num;
cprintf("Count:%i\n", valid_num);
} else if ((*inst_list_it)->isMemRef() &&
!(*inst_list_it)->memOpDone) {
// Loads that have not been marked as executed still count
// towards the total instructions.
++valid_num;
cprintf("Count:%i\n", valid_num);
}
}
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
"Issued:%i\nSquashed:%i\n",
(*inst_list_it)->readPC(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
if ((*inst_list_it)->isMemRef()) {
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
}
cprintf("\n");
inst_list_it--;
++num;
}
cprintf("Waiting list size: %i\n", waitingList.size());
inst_list_it = --(waitingList.end());

View File

@@ -84,6 +84,8 @@ class OzoneLWLSQ {
/** Returns the name of the LSQ unit. */
std::string name() const;
void regStats();
/** Sets the CPU pointer. */
void setCPU(OzoneCPU *cpu_ptr);
@@ -179,7 +181,7 @@ class OzoneLWLSQ {
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
int numStores() { return stores; }
int numStores() { return stores + storesInFlight; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
@@ -188,7 +190,7 @@ class OzoneLWLSQ {
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); }
bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
@@ -223,7 +225,9 @@ class OzoneLWLSQ {
void storePostSend(Packet *pkt, DynInstPtr &inst);
/** Completes the store at the specified index. */
void completeStore(int store_idx);
void completeStore(DynInstPtr &inst);
void removeStore(int store_idx);
/** Handles doing the retry. */
void recvRetry();
@@ -394,6 +398,10 @@ class OzoneLWLSQ {
int storesToWB;
public:
int storesInFlight;
private:
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
@@ -403,6 +411,9 @@ class OzoneLWLSQ {
//list<InstSeqNum> mshrSeqNums;
/** Tota number of memory ordering violations. */
Stats::Scalar<> lsqMemOrderViolation;
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
@@ -525,7 +536,7 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
store_size = (*sq_it).size;
if (store_size == 0) {
if (store_size == 0 || (*sq_it).committed) {
sq_it++;
continue;
}

View File

@@ -132,7 +132,7 @@ OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ()
: switchedOut(false), dcachePort(this), loads(0), stores(0),
storesToWB(0), stalled(false), isStoreBlocked(false),
storesToWB(0), storesInFlight(0), stalled(false), isStoreBlocked(false),
isLoadBlocked(false), loadBlockedHandled(false)
{
}
@@ -173,6 +173,11 @@ OzoneLWLSQ<Impl>::name() const
template<class Impl>
void
OzoneLWLSQ<Impl>::regStats()
{
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr)
{
cpu = cpu_ptr;
@@ -321,7 +326,7 @@ unsigned
OzoneLWLSQ<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
unsigned free_sq_entries = SQEntries - stores;
unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
@@ -385,6 +390,9 @@ OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
// Actually probably want the oldest faulting load
if (load_fault != NoFault) {
DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
if (!(inst->req->flags & UNCACHEABLE && !inst->isAtCommit())) {
inst->setExecuted();
}
// Maybe just set it as can commit here, although that might cause
// some other problems with sending traps to the ROB too quickly.
be->instToCommit(inst);
@@ -461,6 +469,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it);
++lsqMemOrderViolation;
return TheISA::genMachineCheckFault();
}
@@ -553,8 +562,8 @@ OzoneLWLSQ<Impl>::writebackStores()
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
completeStore(inst->sqIdx);
removeStore(inst->sqIdx);
completeStore(inst);
continue;
}
@@ -626,6 +635,8 @@ OzoneLWLSQ<Impl>::writebackStores()
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
storesInFlight + 1);
if (dcacheInterface) {
assert(!req->completionEvent);
@@ -687,6 +698,8 @@ OzoneLWLSQ<Impl>::writebackStores()
}
sq_it--;
}
++storesInFlight;
// removeStore(inst->sqIdx);
} else {
panic("Must HAVE DCACHE!!!!!\n");
}
@@ -704,7 +717,7 @@ void
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
LQIt lq_it = loadQueue.begin();
@@ -881,7 +894,7 @@ OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
template <class Impl>
void
OzoneLWLSQ<Impl>::completeStore(int store_idx)
OzoneLWLSQ<Impl>::removeStore(int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
@@ -891,8 +904,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
(*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst;
--storesToWB;
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@@ -910,6 +921,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
}
template <class Impl>
void
OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
{
--storesToWB;
--stores;
inst->setCompleted();
@@ -935,9 +953,14 @@ OzoneLWLSQ<Impl>::switchOut()
switchedOut = true;
// Clear the queue to free up resources
assert(stores == 0);
assert(storeQueue.empty());
assert(loads == 0);
assert(loadQueue.empty());
assert(storesInFlight == 0);
storeQueue.clear();
loadQueue.clear();
loads = stores = storesToWB = 0;
loads = stores = storesToWB = storesInFlight = 0;
}
template <class Impl>

View File

@@ -71,10 +71,11 @@ class SimpleParams : public BaseCPU::Params
unsigned cachePorts;
unsigned width;
unsigned frontEndLatency;
unsigned frontEndWidth;
unsigned backEndLatency;
unsigned backEndWidth;
unsigned backEndSquashLatency;
unsigned backEndLatency;
unsigned maxInstBufferSize;
unsigned numPhysicalRegs;
unsigned maxOutstandingMemOps;
@@ -150,6 +151,7 @@ class SimpleParams : public BaseCPU::Params
//
unsigned LQEntries;
unsigned SQEntries;
bool lsqLimits;
//
// Memory dependence

View File

@@ -34,9 +34,12 @@
#include "arch/faults.hh"
#include "arch/types.hh"
#include "arch/regfile.hh"
#include "base/callback.hh"
#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/process.hh"
#include "sim/sim_exit.hh"
class Event;
//class Process;
@@ -65,8 +68,21 @@ struct OzoneThreadState : public ThreadState {
#if FULL_SYSTEM
OzoneThreadState(CPUType *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
intrflag(0), inSyscall(0), trapPending(0)
cpu(_cpu), intrflag(0), inSyscall(0), trapPending(0)
{
if (cpu->params->profile) {
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
Callback *cb =
new MakeCallback<OzoneThreadState,
&OzoneThreadState::dumpFuncProfile>(this);
registerExitCallback(cb);
}
// let's fill with a dummy node for now so we don't get a segfault
// on the first cycle when there's no node available.
static ProfileNode dummyNode;
profileNode = &dummyNode;
profilePC = 3;
miscRegFile.clear();
}
#else
@@ -130,6 +146,14 @@ struct OzoneThreadState : public ThreadState {
void setNextPC(uint64_t val)
{ nextPC = val; }
#if FULL_SYSTEM
void dumpFuncProfile()
{
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
profile->dump(xcProxy, *os);
}
#endif
};
#endif // __CPU_OZONE_THREAD_STATE_HH__

View File

@@ -170,7 +170,7 @@ BaseSimpleCPU::regStats()
void
BaseSimpleCPU::resetStats()
{
startNumInst = numInst;
// startNumInst = numInst;
// notIdleFraction = (_status != Idle);
}

View File

@@ -162,6 +162,11 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext)
if (quiesceEvent) {
quiesceEvent->tc = tc;
}
Kernel::Statistics *stats = oldContext->getKernelStats();
if (stats) {
kernelStats = stats;
}
#endif
storeCondFailures = 0;

View File

@@ -32,6 +32,7 @@
#define __CPU_THREAD_STATE_HH__
#include "arch/types.hh"
#include "cpu/profile.hh"
#include "cpu/thread_context.hh"
#if !FULL_SYSTEM
@@ -191,6 +192,21 @@ struct ThreadState {
// simulation only; all functional memory accesses should use
// one of the FunctionalMemory pointers above.
short asid;
#endif
#if FULL_SYSTEM
void profileClear()
{
if (profile)
profile->clear();
}
void profileSample()
{
if (profile)
profile->sample(profileNode, profilePC);
}
#endif
/** Current instruction the thread is committing. Only set and

View File

@@ -242,6 +242,10 @@ class IdeDisk : public SimObject
Stats::Scalar<> dmaWriteFullPages;
Stats::Scalar<> dmaWriteBytes;
Stats::Scalar<> dmaWriteTxs;
Stats::Formula rdBandwidth;
Stats::Formula wrBandwidth;
Stats::Formula totBandwidth;
Stats::Formula totBytes;
public:
/**

View File

@@ -26,6 +26,9 @@ class BaseCPU(SimObject):
"terminate when all threads have reached this load count")
max_loads_any_thread = Param.Counter(0,
"terminate when any thread reaches this load count")
stats_reset_inst = Param.Counter(0,
"reset stats once this many instructions are committed")
progress_interval = Param.Tick(0, "interval to print out the progress message")
defer_registration = Param.Bool(False,
"defer registration with system (for sampling)")

View File

@@ -9,6 +9,8 @@ class DerivO3CPU(BaseCPU):
activity = Param.Unsigned(0, "Initial count")
numThreads = Param.Unsigned(1, "number of HW thread contexts")
if build_env['FULL_SYSTEM']:
profile = Param.Latency('0ns', "trace the kernel stack")
if build_env['USE_CHECKER']:
if not build_env['FULL_SYSTEM']:
checker = Param.BaseCPU(O3Checker(workload=Parent.workload,

View File

@@ -8,12 +8,15 @@ class DerivOzoneCPU(BaseCPU):
numThreads = Param.Unsigned("number of HW thread contexts")
checker = Param.BaseCPU("Checker CPU")
if build_env['FULL_SYSTEM']:
profile = Param.Latency('0ns', "trace the kernel stack")
icache_port = Port("Instruction Port")
dcache_port = Port("Data Port")
width = Param.Unsigned("Width")
frontEndWidth = Param.Unsigned("Front end width")
frontEndLatency = Param.Unsigned("Front end latency")
backEndWidth = Param.Unsigned("Back end width")
backEndSquashLatency = Param.Unsigned("Back end squash latency")
backEndLatency = Param.Unsigned("Back end latency")
@@ -76,6 +79,7 @@ class DerivOzoneCPU(BaseCPU):
LQEntries = Param.Unsigned("Number of load queue entries")
SQEntries = Param.Unsigned("Number of store queue entries")
lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
LFSTSize = Param.Unsigned("Last fetched store table size")
SSITSize = Param.Unsigned("Store set ID table size")

View File

@@ -1,6 +1,7 @@
from m5.SimObject import SimObject
from m5.params import *
from Serialize import Serialize
from Serialize import Statreset
from Statistics import Statistics
from Trace import Trace
from ExeTrace import ExecutionTrace

View File

@@ -16,6 +16,7 @@ class System(SimObject):
boot_osflags = Param.String("a", "boot flags to pass to the kernel")
kernel = Param.String("file that contains the kernel code")
readfile = Param.String("", "file to read startup script from")
symbolfile = Param.String("", "file to get the symbols from")
class AlphaSystem(System):
type = 'AlphaSystem'

View File

@@ -46,6 +46,7 @@
#include "sim/host.hh" // for Tick
#include "base/fast_alloc.hh"
#include "base/misc.hh"
#include "base/trace.hh"
#include "sim/serialize.hh"
@@ -135,7 +136,7 @@ class Event : public Serializable, public FastAlloc
/// same cycle (after unscheduling the old CPU's tick event).
/// The switch needs to come before any tick events to make
/// sure we don't tick both CPUs in the same cycle.
CPU_Switch_Pri = 31,
CPU_Switch_Pri = -31,
/// Serailization needs to occur before tick events also, so
/// that a serialize/unserialize is identical to an on-line
@@ -351,7 +352,8 @@ inline void
Event::schedule(Tick t)
{
assert(!scheduled());
assert(t >= curTick);
// if (t < curTick)
// warn("t is less than curTick, ensure you don't want cycles");
setFlags(Scheduled);
#if TRACING_ON

View File

@@ -148,6 +148,54 @@ namespace AlphaPseudo
exitSimLoop(when, "m5_exit instruction encountered");
}
void
loadsymbol(ExecContext *xc)
{
const string &filename = xc->getCpuPtr()->system->params()->symbolfile;
if (filename.empty()) {
return;
}
std::string buffer;
ifstream file(filename.c_str());
if (!file)
fatal("file error: Can't open symbol table file %s\n", filename);
while (!file.eof()) {
getline(file, buffer);
if (buffer.empty())
continue;
int idx = buffer.find(' ');
if (idx == string::npos)
continue;
string address = "0x" + buffer.substr(0, idx);
eat_white(address);
if (address.empty())
continue;
// Skip over letter and space
string symbol = buffer.substr(idx + 3);
eat_white(symbol);
if (symbol.empty())
continue;
Addr addr;
if (!to_number(address, addr))
continue;
if (!xc->getSystemPtr()->kernelSymtab->insert(addr, symbol))
continue;
DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
}
file.close();
}
void
resetstats(ThreadContext *tc, Tick delay, Tick period)
{

View File

@@ -51,6 +51,7 @@ namespace AlphaPseudo
void ivle(ThreadContext *tc);
void m5exit(ThreadContext *tc, Tick delay);
void m5exit_old(ThreadContext *tc);
void loadsymbol(ThreadContext *xc);
void resetstats(ThreadContext *tc, Tick delay, Tick period);
void dumpstats(ThreadContext *tc, Tick delay, Tick period);
void dumpresetstats(ThreadContext *tc, Tick delay, Tick period);

View File

@@ -52,6 +52,9 @@
#include "sim/sim_exit.hh"
#include "sim/sim_object.hh"
// For stat reset hack
#include "sim/stat_control.hh"
using namespace std;
int Serializable::ckptMaxCount = 0;
@@ -404,3 +407,36 @@ Checkpoint::sectionExists(const std::string &section)
{
return db->sectionExists(section);
}
/** Hacked stat reset event */
class StatresetParamContext : public ParamContext
{
public:
StatresetParamContext(const string &section);
~StatresetParamContext();
void startup();
};
StatresetParamContext statParams("statsreset");
Param<Tick> reset_cycle(&statParams, "reset_cycle",
"Cycle to reset stats on", 0);
StatresetParamContext::StatresetParamContext(const string &section)
: ParamContext(section)
{ }
StatresetParamContext::~StatresetParamContext()
{
}
void
StatresetParamContext::startup()
{
if (reset_cycle > 0) {
Stats::SetupEvent(Stats::Reset, curTick + reset_cycle, 0);
cprintf("Stats reset event scheduled for %lli\n",
curTick + reset_cycle);
}
}

View File

@@ -160,13 +160,13 @@ class StatEvent : public Event
Tick repeat;
public:
StatEvent(int _flags, Tick _when, Tick _repeat);
StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat);
virtual void process();
virtual const char *description();
};
StatEvent::StatEvent(int _flags, Tick _when, Tick _repeat)
: Event(&mainEventQueue, Stat_Event_Pri),
StatEvent::StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat)
: Event(queue, Stat_Event_Pri),
flags(_flags), repeat(_repeat)
{
setFlags(AutoDelete);
@@ -185,8 +185,10 @@ StatEvent::process()
if (flags & Stats::Dump)
DumpNow();
if (flags & Stats::Reset)
if (flags & Stats::Reset) {
cprintf("Resetting stats!\n");
reset();
}
if (repeat)
schedule(curTick + repeat);
@@ -214,9 +216,12 @@ DumpNow()
}
void
SetupEvent(int flags, Tick when, Tick repeat)
SetupEvent(int flags, Tick when, Tick repeat, EventQueue *queue)
{
new StatEvent(flags, when, repeat);
if (queue == NULL)
queue = &mainEventQueue;
new StatEvent(queue, flags, when, repeat);
}
/* namespace Stats */ }

View File

@@ -34,6 +34,8 @@
#include <fstream>
#include <list>
class EventQueue;
namespace Stats {
enum {
@@ -45,7 +47,7 @@ class Output;
extern std::list<Output *> OutputList;
void DumpNow();
void SetupEvent(int flags, Tick when, Tick repeat = 0);
void SetupEvent(int flags, Tick when, Tick repeat = 0, EventQueue *queue = NULL);
void InitSimStats();

View File

@@ -182,6 +182,7 @@ class System : public SimObject
std::string kernel_path;
std::string readfile;
std::string symbolfile;
#endif
};