From 6a0c5b9fad3fa437fbea968f2ddeaad31ea51129 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 11 Jun 2006 14:38:14 -0400 Subject: [PATCH 001/152] Edit Fetch DPRINT in simple CPU src/arch/mips/isa/formats/mt.isa: change copyright to 2006 src/cpu/simple/base.cc: Only DPRINT NNPC if we are not using ALPHA src/cpu/static_inst.hh: Take Out MIPS Specific functions ... --HG-- extra : convert_revision : 7a69e80cd1564fa3b778b9dade0e9fe3cef94e64 --- src/arch/mips/isa/formats/mt.isa | 7 ++++--- src/cpu/simple/base.cc | 6 ++++++ src/cpu/static_inst.hh | 15 +++++---------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/arch/mips/isa/formats/mt.isa b/src/arch/mips/isa/formats/mt.isa index 521b011239..96435f8c9a 100644 --- a/src/arch/mips/isa/formats/mt.isa +++ b/src/arch/mips/isa/formats/mt.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,14 +35,15 @@ output header {{ /** - * Base class for integer operations. + * Base class for MIPS MT ASE operations. */ class MT : public MipsStaticInst { protected: /// Constructor - MT(const char *mnem, MachInst _machInst, OpClass __opClass) : MipsStaticInst(mnem, _machInst, __opClass) + MT(const char *mnem, MachInst _machInst, OpClass __opClass) : + MipsStaticInst(mnem, _machInst, __opClass) { } diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index c1ecf39676..b854dfab24 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Steve Reinhardt + * Korey Sewell */ #include "arch/utility.hh" @@ -358,8 +359,13 @@ Fault BaseSimpleCPU::setupFetchRequest(Request *req) { // set up memory request for instruction fetch +#if THE_ISA == ALPHA_ISA + DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p",thread->readPC(), + thread->readNextPC()); +#else DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",thread->readPC(), thread->readNextPC(),thread->readNextNPC()); +#endif req->setVirt(0, thread->readPC() & ~3, sizeof(MachInst), (FULL_SYSTEM && (thread->readPC() & 1)) ? PHYSICAL : 0, diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index bea52f510c..a980786343 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -34,6 +34,7 @@ #include #include +#include "base/bitfield.hh" #include "base/hashmap.hh" #include "base/misc.hh" #include "base/refcnt.hh" @@ -411,16 +412,10 @@ class StaticInst : public StaticInstBase //This is defined as inline below. static StaticInstPtr decode(ExtMachInst mach_inst); - //MIPS Decoder Debug Functions - int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 - int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 - int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 - int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 - int getImm() { return (machInst & 0x0000FFFF); } //15...0 - int getFunction(){ return (machInst & 0x0000003F); }//5...0 - int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 - int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 - int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + /// Return opcode of machine instruction + uint32_t getOpcode() { return bits(machInst, 31, 26);} + + /// Return name of machine instruction std::string getName() { return mnemonic; } }; From 804a7efa3ca8720b05a8f833f6545ce4375320cf Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 11 Jun 2006 15:38:40 -0400 Subject: [PATCH 002/152] next round of MIPS ISA changes src/arch/mips/isa/decoder.isa: div,divu,ext,seb,seh, fp conditonal moves, fp indexed memory... src/arch/mips/isa/formats/mem.isa: MemoryNoDisp class .. use sext<> function instead of doing it manually src/arch/mips/regfile/float_regfile.hh: use bits function --HG-- extra : convert_revision : cbbda9499185b91bdb2a6198fe1b961be04f9265 --- src/arch/mips/isa/decoder.isa | 46 +++++++++++++++----------- src/arch/mips/isa/formats/mem.isa | 36 +++++++++++++++----- src/arch/mips/regfile/float_regfile.hh | 14 ++++---- 3 files changed, 61 insertions(+), 35 deletions(-) diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index a64f74c4fc..14aa0cc77c 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -158,14 +158,16 @@ decode OPCODE_HI default Unknown::unknown() { } format HiLoMiscOp { - 0x2: div({{ - HI = Rs.sd % Rt.sd; - LO = Rs.sd / Rt.sd; - }}); - 0x3: divu({{ - HI = Rs.ud % Rt.ud; - LO = Rs.ud / Rt.ud; - }}); + 0x2: div({{ if (Rt.sd != 0) { + HI = Rs.sd % Rt.sd; + LO = Rs.sd / Rt.sd; + } + }}); + 0x3: divu({{ if (Rt.ud != 0) { + HI = Rs.ud % Rt.ud; + LO = Rs.ud / Rt.ud; + } + }}); } } @@ -333,7 +335,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x0: decode RS_HI { 0x0: decode RS_LO { format CP1Control { - 0x0: mfc1 ({{ Rt.uw = Fs.uw<31:0>; }}); + 0x0: mfc1 ({{ Rt.uw = Fs.uw; }}); 0x2: cfc1({{ switch (FS) @@ -438,9 +440,10 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: div_s({{ Fd.sf = Fs.sf / Ft.sf;}}); 0x4: sqrt_s({{ Fd.sf = sqrt(Fs.sf);}}); 0x5: abs_s({{ Fd.sf = fabs(Fs.sf);}}); - 0x6: mov_s({{ Fd.sf = Fs.sf;}}); 0x7: neg_s({{ Fd.sf = -Fs.sf;}}); } + + 0x6: BasicOp::mov_s({{ Fd.sf = Fs.sf;}}); } 0x1: decode FUNCTION_LO { @@ -549,9 +552,10 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: div_d({{ Fd.df = Fs.df / Ft.df; }}); 0x4: sqrt_d({{ Fd.df = sqrt(Fs.df); }}); 0x5: abs_d({{ Fd.df = fabs(Fs.df); }}); - 0x6: mov_d({{ Fd.df = Fs.df; }}); 0x7: neg_d({{ Fd.df = -1 * Fs.df; }}); } + + 0x6: BasicOp::mov_d({{ Fd.df = Fs.df; }}); } 0x1: decode FUNCTION_LO { @@ -853,17 +857,19 @@ decode OPCODE_HI default Unknown::unknown() { 0x3: decode FUNCTION_HI { 0x0: decode FUNCTION_LO { format LoadIndexedMemory { - 0x0: lwxc1({{ Ft.uw = Mem.uw;}}); - 0x1: ldxc1({{ Ft.ud = Mem.ud;}}); - 0x5: luxc1({{ Ft.uw = Mem.ud;}}); + 0x0: lwxc1({{ Fd.uw = Mem.uw;}}); + 0x1: ldxc1({{ Fd.ud = Mem.ud;}}); + 0x5: luxc1({{ Fd.uw = Mem.ud;}}, + {{ EA = (Rs + Rt) & ~7; }}); } } 0x1: decode FUNCTION_LO { format StoreIndexedMemory { - 0x0: swxc1({{ Mem.uw = Ft.uw;}}); - 0x1: sdxc1({{ Mem.ud = Ft.ud;}}); - 0x5: suxc1({{ Mem.ud = Ft.ud;}}); + 0x0: swxc1({{ Mem.uw = Fs.uw;}}); + 0x1: sdxc1({{ Mem.ud = Fs.ud;}}); + 0x5: suxc1({{ Mem.ud = Fs.ud;}}, + {{ EA = (Rs + Rt) & ~7; }}); } 0x7: Prefetch::prefx({{ EA = Rs + Rt; }}); @@ -991,7 +997,7 @@ decode OPCODE_HI default Unknown::unknown() { 0x7: decode FUNCTION_HI { 0x0: decode FUNCTION_LO { format BasicOp { - 0x1: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }}); + 0x0: ext({{ Rt.uw = bits(Rs.uw, MSB+LSB, LSB); }}); 0x4: ins({{ Rt.uw = bits(Rt.uw, 31, MSB+1) << (MSB+1) | bits(Rs.uw, MSB-LSB, 0) << LSB | bits(Rt.uw, LSB-1, 0); @@ -1014,8 +1020,8 @@ decode OPCODE_HI default Unknown::unknown() { Rt.uw<7:0> << 8 | Rt.uw<15:8>; }}); - 0x10: seb({{ Rd.sw = Rt.sw<7:0>}}); - 0x18: seh({{ Rd.sw = Rt.sw<15:0>}}); + 0x10: seb({{ Rd.sw = Rt.sb; }}); + 0x18: seh({{ Rd.sw = Rt.sh; }}); } } diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index f522470567..cf6d3de747 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -58,14 +58,8 @@ output header {{ StaticInstPtr _memAccPtr = nullStaticInstPtr) : MipsStaticInst(mnem, _machInst, __opClass), memAccessFlags(0), eaCompPtr(_eaCompPtr), memAccPtr(_memAccPtr), - disp(OFFSET) + disp(sext<16>(OFFSET)) { - //If Bit 15 is 1 then Sign Extend - int32_t temp = disp & 0x00008000; - - if (temp > 0) { - disp |= 0xFFFF0000; - } } std::string @@ -77,6 +71,24 @@ output header {{ const StaticInstPtr &memAccInst() const { return memAccPtr; } }; + /** + * Base class for a few miscellaneous memory-format insts + * that don't interpret the disp field + */ + class MemoryNoDisp : public Memory + { + protected: + /// Constructor + MemoryNoDisp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + StaticInstPtr _eaCompPtr = nullStaticInstPtr, + StaticInstPtr _memAccPtr = nullStaticInstPtr) + : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr) + { + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; }}; @@ -84,10 +96,18 @@ output decoder {{ std::string Memory::generateDisassembly(Addr pc, const SymbolTable *symtab) const { - return csprintf("%-10s %c%d,%d(r%d)", mnemonic, + return csprintf("%-10s %c%d, %d(r%d)", mnemonic, flags[IsFloating] ? 'f' : 'r', RT, disp, RS); } + std::string + MemoryNoDisp::generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + return csprintf("%-10s %c%d, r%d(r%d)", mnemonic, + flags[IsFloating] ? 'f' : 'r', + flags[IsFloating] ? FD : RD, + RS, RT); + } }}; def template LoadStoreDeclare {{ diff --git a/src/arch/mips/regfile/float_regfile.hh b/src/arch/mips/regfile/float_regfile.hh index d1a60298ad..e260f681c4 100644 --- a/src/arch/mips/regfile/float_regfile.hh +++ b/src/arch/mips/regfile/float_regfile.hh @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell */ #ifndef __ARCH_MIPS_FLOAT_REGFILE_HH__ @@ -34,13 +32,14 @@ #include "arch/mips/types.hh" #include "arch/mips/constants.hh" #include "base/misc.hh" +#include "base/bitfield.hh" #include "config/full_system.hh" #include "sim/byteswap.hh" #include "sim/faults.hh" #include "sim/host.hh" class Checkpoint; -class ThreadContext; +class ExecContext; class Regfile; namespace MipsISA @@ -103,6 +102,7 @@ namespace MipsISA Fault setReg(int floatReg, const FloatReg &val, int width) { + using namespace std; switch(width) { case SingleWidth: @@ -117,8 +117,8 @@ namespace MipsISA { const void *double_ptr = &val; FloatReg64 temp_double = *(FloatReg64 *) double_ptr; - regs[floatReg + 1] = temp_double >> 32; - regs[floatReg] = 0x0000FFFF & temp_double; + regs[floatReg + 1] = bits(temp_double, 63, 32); + regs[floatReg] = bits(temp_double, 31, 0); break; } @@ -140,8 +140,8 @@ namespace MipsISA break; case DoubleWidth: - regs[floatReg + 1] = val >> 32; - regs[floatReg] = val; + regs[floatReg + 1] = bits(val, 63, 32); + regs[floatReg] = bits(val, 31, 0); break; default: From 6b5a8390c7812895b186c6712c98a0f3790aa169 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 11 Jun 2006 16:25:31 -0400 Subject: [PATCH 003/152] Remove MIPS tests --HG-- extra : convert_revision : 03cd1c26b49422feb3ea1de5947063c7048dfa67 From 13d46e2075444fb4d9b36c766a7641a637f83dcf Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 11 Jun 2006 19:04:22 -0400 Subject: [PATCH 004/152] Remove Full System Files src/arch/mips/SConscript: remove full-system src/arch/mips/stacktrace.hh: alpha to mips --HG-- extra : convert_revision : 5aa7ba6a4af4884e911708608dfc5a552f70654d --- src/arch/mips/SConscript | 3 +-- src/arch/mips/stacktrace.hh | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/arch/mips/SConscript b/src/arch/mips/SConscript index 6295a6c119..8353bcde7a 100644 --- a/src/arch/mips/SConscript +++ b/src/arch/mips/SConscript @@ -52,8 +52,7 @@ base_sources = Split(''' # Full-system sources full_system_sources = Split(''' - memory.cc - mips34k.cc + #Insert Full-System Files Here ''') # Syscall emulation (non-full-system) sources diff --git a/src/arch/mips/stacktrace.hh b/src/arch/mips/stacktrace.hh index 38767cef72..ff35aaeb06 100644 --- a/src/arch/mips/stacktrace.hh +++ b/src/arch/mips/stacktrace.hh @@ -28,8 +28,8 @@ * Authors: Korey Sewell */ -#ifndef __ARCH_ALPHA_STACKTRACE_HH__ -#define __ARCH_ALPHA_STACKTRACE_HH__ +#ifndef __ARCH_MIPS_STACKTRACE_HH__ +#define __ARCH_MIPS_STACKTRACE_HH__ #include "base/trace.hh" #include "cpu/static_inst.hh" @@ -118,4 +118,4 @@ StackTrace::trace(ThreadContext *tc, StaticInstPtr inst) return true; } -#endif // __ARCH_ALPHA_STACKTRACE_HH__ +#endif // __ARCH_MIPS_STACKTRACE_HH__ From babb22eea4be07cbc725eea129c6a6557fd56af2 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 12 Jun 2006 06:19:13 -0400 Subject: [PATCH 005/152] Fix python binary name in arch/SConscript. Also delete no-longer-needed SPARC test.py files (should have gone with my last chnageset where LiveProcess became ISA-independent). src/arch/SConscript: Mistakenly committed hard-coded python binary name. Should use same python to run isa_parser that was used to run scons. --HG-- extra : convert_revision : a3acd5496f3b930d57bb59ae164b8a4a8065bbf8 --- src/arch/SConscript | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arch/SConscript b/src/arch/SConscript index c90694a68b..ff460dafd5 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -28,7 +28,7 @@ # # Authors: Steve Reinhardt -import os.path +import os.path, sys # Import build environment variable from SConstruct. Import('env') @@ -134,7 +134,8 @@ def isa_desc_emitter(target, source, env): return (isa_desc_gen_files, [isa_parser, cpu_models_file] + source) # Pieces are in place, so create the builder. -isa_desc_builder = Builder(action='python2.4 $SOURCES $TARGET.dir $CPU_MODELS', +python = sys.executable # use same Python binary used to run scons +isa_desc_builder = Builder(action=python + ' $SOURCES $TARGET.dir $CPU_MODELS', emitter = isa_desc_emitter) env.Append(BUILDERS = { 'ISADesc' : isa_desc_builder }) From 3e1537cf8b6a9fdbffa3975fde9f3d7f12c719f9 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 16:42:56 -0400 Subject: [PATCH 006/152] Removed syscall function from thread_context.hh. ThreadContext is the interface for external, non-CPU objects to access the thread, so they probably shouldn't be able to call syscall(). The case it was being used for was already handled by the ISA code. src/arch/sparc/faults.cc: src/cpu/thread_context.hh: Fix for merge problems. --HG-- extra : convert_revision : 05a7a2d6e45099fcf36d113da2e52450d892a72c --- src/arch/sparc/faults.cc | 2 +- src/cpu/thread_context.hh | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc index 57b4d4d863..2af242bd8a 100644 --- a/src/arch/sparc/faults.cc +++ b/src/arch/sparc/faults.cc @@ -249,7 +249,7 @@ void SparcFault::invoke(ThreadContext * tc) void TrapInstruction::invoke(ThreadContext * tc) { - tc->syscall(syscall_num); + // Should be handled in ISA. } #endif diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 48c8fa28dd..3c79e11168 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -245,8 +245,6 @@ class ThreadContext virtual void setSyscallReturn(SyscallReturn return_value) = 0; - virtual void syscall(int64_t callnum) = 0; - // Same with st cond failures. virtual Counter readFuncExeInst() = 0; #endif @@ -432,8 +430,6 @@ class ProxyThreadContext : public ThreadContext void setSyscallReturn(SyscallReturn return_value) { actualTC->setSyscallReturn(return_value); } - void syscall(int64_t callnum) { actualTC->syscall(callnum); } - Counter readFuncExeInst() { return actualTC->readFuncExeInst(); } #endif From 841d0b9d40446160fdc1e073e16f9bd7b6501911 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 17:53:57 -0400 Subject: [PATCH 007/152] Merge fixes to make full system compile and run. src/arch/alpha/linux/system.cc: src/cpu/o3/alpha_cpu_impl.hh: src/sim/system.cc: Merge fixes. --HG-- extra : convert_revision : aa3326c0ebf54da9ab1dbd2d9877da41ca487082 --- src/arch/alpha/linux/system.cc | 3 --- src/cpu/o3/alpha_cpu_impl.hh | 1 + src/sim/system.cc | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/arch/alpha/linux/system.cc b/src/arch/alpha/linux/system.cc index bb35f046dc..9fe63c390d 100644 --- a/src/arch/alpha/linux/system.cc +++ b/src/arch/alpha/linux/system.cc @@ -150,9 +150,6 @@ LinuxAlphaSystem::~LinuxAlphaSystem() delete debugPrintkEvent; delete idleStartEvent; delete printThreadEvent; - delete intStartEvent; - delete intEndEvent; - delete intEndEvent2; } diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index 98290e57ff..bfd05d2606 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -46,6 +46,7 @@ #include "arch/isa_traits.hh" #include "cpu/quiesce_event.hh" #include "kern/kernel_stats.hh" +#include "sim/sim_exit.hh" #include "sim/system.hh" #endif diff --git a/src/sim/system.cc b/src/sim/system.cc index b3c7870fdd..91bba85fe7 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -119,8 +119,6 @@ System::System(Params *p) DPRINTF(Loader, "Kernel end = %#x\n", kernelEnd); DPRINTF(Loader, "Kernel entry = %#x\n", kernelEntry); DPRINTF(Loader, "Kernel loaded...\n"); - - kernelBinning = new Kernel::Binning(this); #endif // FULL_SYSTEM // increment the number of running systms From 5639fb2c431fffa11392f34f90619fac806aa277 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 18:58:29 -0400 Subject: [PATCH 008/152] Fix memory leak. src/arch/alpha/ev5.cc: Fix memory leak. The faults are refcounted, but that only works if you're actually assigning them to a RefCountingPtr. --HG-- extra : convert_revision : 9a57963eb5d5d86c16023bfedb0fb5ccdbe7efaa --- src/arch/alpha/ev5.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/arch/alpha/ev5.cc b/src/arch/alpha/ev5.cc index c419762b75..247c5f56e6 100644 --- a/src/arch/alpha/ev5.cc +++ b/src/arch/alpha/ev5.cc @@ -59,8 +59,12 @@ AlphaISA::initCPU(ThreadContext *tc, int cpuId) tc->setIntReg(16, cpuId); tc->setIntReg(0, cpuId); - tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + (new ResetFault)->vect()); + AlphaFault *reset = new ResetFault; + + tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + reset->vect()); tc->setNextPC(tc->readPC() + sizeof(MachInst)); + + delete reset; } //////////////////////////////////////////////////////////////////////// From fbf3a82c5c358d75acd9a97ad1dcef9796b83c96 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 18:59:24 -0400 Subject: [PATCH 009/152] Fix compile error. --HG-- extra : convert_revision : 334f5033f5a3a303bfaec3a3acfbd36f205efe86 --- src/arch/mips/isa/formats/branch.isa | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa index 8cfa37a201..ea5af22c04 100644 --- a/src/arch/mips/isa/formats/branch.isa +++ b/src/arch/mips/isa/formats/branch.isa @@ -111,7 +111,7 @@ output header {{ { } - Addr branchTarget(ExecContext *xc) const; + Addr branchTarget(ThreadContext *tc) const; std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; @@ -132,10 +132,10 @@ output decoder {{ } Addr - Jump::branchTarget(ExecContext *xc) const + Jump::branchTarget(ThreadContext *tc) const { - Addr NPC = xc->readPC() + 4; - uint64_t Rb = xc->readIntReg(_srcRegIdx[0]); + Addr NPC = tc->readPC() + 4; + uint64_t Rb = tc->readIntReg(_srcRegIdx[0]); return (Rb & ~3) | (NPC & 1); } From 4acb283496c7667bf0161f45e578c702d2cf8dbc Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 19:04:42 -0400 Subject: [PATCH 010/152] Clean up/shift some code around. src/cpu/base_dyn_inst.cc: Clean up some code and update. src/cpu/base_dyn_inst.hh: Clean up some code and update with more descriptive function names. src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_params.hh: src/cpu/o3/commit.hh: Remove unused parameters. src/cpu/o3/commit_impl.hh: Remove unused parameters, also set squashCounter directly to the counted number of squashes. src/cpu/o3/fetch_impl.hh: Update for function name changes. src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: Remove unused parameter, move some code into a function. --HG-- extra : convert_revision : 45abd77ad43dde2e93c2e53c4738c90ba8352a1d --- src/cpu/base_dyn_inst.cc | 68 +++------------------------------ src/cpu/base_dyn_inst.hh | 38 ++++++------------ src/cpu/o3/alpha_cpu_builder.cc | 15 -------- src/cpu/o3/alpha_params.hh | 5 --- src/cpu/o3/commit.hh | 5 --- src/cpu/o3/commit_impl.hh | 35 ++--------------- src/cpu/o3/fetch_impl.hh | 10 ++--- src/cpu/o3/iew.hh | 8 ++-- src/cpu/o3/iew_impl.hh | 32 ++++++++++------ 9 files changed, 49 insertions(+), 167 deletions(-) diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index 30fa10a6b6..e3829297d8 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -103,6 +103,8 @@ BaseDynInst::initVars() readyRegs = 0; + instResult.integer = 0; + // May want to turn this into a bit vector or something. completed = false; resultReady = false; @@ -242,31 +244,7 @@ template void BaseDynInst::writeHint(Addr addr, int size, unsigned flags) { - // Need to create a MemReq here so we can do a translation. This - // will casue a TLB miss trap if necessary... not sure whether - // that's the best thing to do or not. We don't really need the - // MemReq otherwise, since wh64 has no functional effect. -/* - MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); - req->asid = asid; - - fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault && !(req->flags & UNCACHEABLE)) { - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; - } else { - // ignore faults & accesses to uncacheable space... treat as no-op - effAddr = physEffAddr = MemReq::inval_addr; - } - - storeSize = size; - storeData = 0; -*/ + // Not currently supported. } /** @@ -276,22 +254,7 @@ template Fault BaseDynInst::copySrcTranslate(Addr src) { -/* - MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataReadReq(req); - - if (fault == NoFault) { - thread->copySrcAddr = src; - thread->copySrcPhysAddr = req->paddr; - } else { - thread->copySrcAddr = 0; - thread->copySrcPhysAddr = 0; - } - return fault; -*/ + // Not currently supported. return NoFault; } @@ -302,26 +265,7 @@ template Fault BaseDynInst::copy(Addr dest) { -/* - uint8_t data[64]; - FunctionalMemory *mem = thread->mem; - assert(thread->copySrcPhysAddr); - MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); - req->asid = asid; - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(req); - - if (fault == NoFault) { - Addr dest_addr = req->paddr; - // Need to read straight from memory since we have more than 8 bytes. - req->paddr = thread->copySrcPhysAddr; - mem->read(req, data); - req->paddr = dest_addr; - mem->write(req, data); - } - return fault; -*/ + // Not currently supported. return NoFault; } diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 948ee058ad..fc9bf8b947 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,12 +44,6 @@ #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "sim/system.hh" -/* -#include "encumbered/cpu/full/bpred_update.hh" -#include "encumbered/cpu/full/spec_memory.hh" -#include "encumbered/cpu/full/spec_state.hh" -#include "encumbered/mem/functional/main.hh" -*/ /** * @file @@ -202,10 +196,9 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault fault; /** The memory request. */ -// MemReqPtr req; Request *req; -// Packet pkt; + /** Pointer to the data for the memory access. */ uint8_t *memData; /** The effective virtual address (lds & stores only). */ @@ -288,21 +281,6 @@ class BaseDynInst : public FastAlloc, public RefCounted void initVars(); public: - /** - * @todo: Make this function work; currently it is a dummy function. - * @param fault Last fault. - * @param cmd Last command. - * @param addr Virtual address of access. - * @param p Memory accessed. - * @param nbytes Access size. - */ -// void -// trace_mem(Fault fault, -// MemCmd cmd, -// Addr addr, -// void *p, -// int nbytes); - /** Dumps out contents of this BaseDynInst. */ void dump(); @@ -439,11 +417,13 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Returns the result of a floating point (double) instruction. */ double readDoubleResult() { return instResult.dbl; } + /** Records an integer register being set to a value. */ void setIntReg(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) { if (width == 32) @@ -454,16 +434,19 @@ class BaseDynInst : public FastAlloc, public RefCounted panic("Unsupported width!"); } + /** Records an fp register being set to a value. */ void setFloatReg(const StaticInst *si, int idx, FloatReg val) { instResult.fp = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width) { instResult.integer = val; } + /** Records an fp register being set to an integer value. */ void setFloatRegBits(const StaticInst *si, int idx, uint64_t val) { instResult.integer = val; @@ -590,14 +573,15 @@ class BaseDynInst : public FastAlloc, public RefCounted void setNextPC(uint64_t val) { nextPC = val; -// instResult.integer = val; } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } - void setThread(unsigned tid) { threadNumber = tid; } + /** Sets the thread id. */ + void setTid(unsigned tid) { threadNumber = tid; } - void setState(ImplState *state) { thread = state; } + void setThreadState(ImplState *state) { thread = state; } /** Returns the thread context. */ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index 828977ccb7..a6fbe34d7b 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -92,11 +92,6 @@ Param commitToIEWDelay; Param renameToIEWDelay; Param issueToExecuteDelay; Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; -Param executeBranchWidth; -Param executeMemoryWidth; SimObjectParam fuPool; Param iewToCommitDelay; @@ -213,11 +208,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -344,11 +334,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index f3cf368875..2ece7fb7fc 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -105,11 +105,6 @@ class AlphaSimpleParams : public BaseFullCPU::Params unsigned renameToIEWDelay; unsigned issueToExecuteDelay; unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; FUPool *fuPool; // diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index b7404c4887..0b31cb9c85 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -365,11 +365,6 @@ class DefaultCommit */ unsigned renameWidth; - /** IEW width, in instructions. Used so ROB knows how many - * instructions to get from the IEW instruction queue. - */ - unsigned iewWidth; - /** Commit width, in instructions. */ unsigned commitWidth; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ceb2918e06..8384dbeadc 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -72,7 +72,6 @@ DefaultCommit::DefaultCommit(Params *params) renameToROBDelay(params->renameToROBDelay), fetchToCommitDelay(params->commitToFetchDelay), renameWidth(params->renameWidth), - iewWidth(params->executeWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), switchPending(false), @@ -434,7 +433,7 @@ DefaultCommit::setNextStatus() } } - assert(squashes == squashCounter); + squashCounter = squashes; // If commit is currently squashing, then it will have activity for the // next cycle. Set its next status as active. @@ -539,8 +538,6 @@ DefaultCommit::squashFromTrap(unsigned tid) commitStatus[tid] = ROBSquashing; cpu->activityThisCycle(); - - ++squashCounter; } template @@ -558,8 +555,6 @@ DefaultCommit::squashFromTC(unsigned tid) cpu->activityThisCycle(); tcSquash[tid] = false; - - ++squashCounter; } template @@ -585,10 +580,12 @@ DefaultCommit::tick() if (rob->isDoneSquashing(tid)) { commitStatus[tid] = Running; - --squashCounter; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" "insts this cycle.\n", tid); + rob->doSquash(tid); + toIEW->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; } } } @@ -694,29 +691,7 @@ DefaultCommit::commit() while (threads != (*activeThreads).end()) { unsigned tid = *threads++; -/* - if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { - // Record the fault. Wait until it's empty in the ROB. - // Then handle the trap. Ignore it if there's already a - // trap pending as fetch will be redirected. - fetchFault = fromFetch->fetchFault; - fetchFaultTick = curTick + fetchTrapLatency; - commitStatus[0] = FetchTrapPending; - DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " - "ROB empties without squashing the fault.\n"); - fetchTrapWait = 0; - } - // Fetch may tell commit to clear the trap if it's been squashed. - if (fromFetch->clearFetchFault) { - DPRINTF(Commit, "Received clear fetch fault signal\n"); - fetchTrapWait = 0; - if (commitStatus[0] == FetchTrapPending) { - DPRINTF(Commit, "Clearing fault from fetch\n"); - commitStatus[0] = Running; - } - } -*/ // Not sure which one takes priority. I think if we have // both, that's a bad sign. if (trapSquash[tid] == true) { @@ -744,8 +719,6 @@ DefaultCommit::commit() commitStatus[tid] = ROBSquashing; - ++squashCounter; - // If we want to include the squashing instruction in the squash, // then use one older sequence number. InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0a2a5d094..af2aadf09e 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -817,7 +817,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. fetchStatus[tid] = Squashing; @@ -984,11 +984,11 @@ DefaultFetch::fetch(bool &status_change) DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, next_PC, inst_seq, cpu); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " "[sn:%lli]\n", @@ -1065,11 +1065,11 @@ DefaultFetch::fetch(bool &status_change) next_PC, inst_seq, cpu); instruction->setPredTarg(next_PC + instSize); - instruction->setThread(tid); + instruction->setTid(tid); instruction->setASID(tid); - instruction->setState(cpu->thread[tid]); + instruction->setThreadState(cpu->thread[tid]); instruction->traceData = NULL; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2e61af5fcf..455de7c3fd 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -261,6 +261,9 @@ class DefaultIEW /** Processes inputs and changes state accordingly. */ void checkSignalsAndUpdate(unsigned tid); + /** Removes instructions from rename from a thread's instruction list. */ + void emptyRenameInsts(unsigned tid); + /** Sorts instructions coming from rename into lists separated by thread. */ void sortInsts(); @@ -390,11 +393,6 @@ class DefaultIEW /** Width of issue, in instructions. */ unsigned issueWidth; - /** Width of execute, in instructions. Might make more sense to break - * down into FP vs int. - */ - unsigned executeWidth; - /** Index into queue of instructions being written back. */ unsigned wbNumInst; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 3929f2e194..0649f10ecb 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -52,7 +52,6 @@ DefaultIEW::DefaultIEW(Params *params) issueToExecuteDelay(params->issueToExecuteDelay), issueReadWidth(params->issueWidth), issueWidth(params->issueWidth), - executeWidth(params->executeWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -456,16 +455,7 @@ DefaultIEW::squash(unsigned tid) skidBuffer[tid].pop(); } - while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; - } - - toRename->iewInfo[tid].dispatched++; - - insts[tid].pop(); - } + emptyRenameInsts(tid); } template @@ -799,10 +789,12 @@ DefaultIEW::checkSignalsAndUpdate(unsigned tid) } if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); + DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid); dispatchStatus[tid] = Squashing; + emptyRenameInsts(tid); + wroteToTimeBuffer = true; return; } @@ -851,6 +843,22 @@ DefaultIEW::sortInsts() } } +template +void +DefaultIEW::emptyRenameInsts(unsigned tid) +{ + while (!insts[tid].empty()) { + if (insts[tid].front()->isLoad() || + insts[tid].front()->isStore() ) { + toRename->iewInfo[tid].dispatchedToLSQ++; + } + + toRename->iewInfo[tid].dispatched++; + + insts[tid].pop(); + } +} + template void DefaultIEW::wakeCPU() From 6152e8abc3a120efd6c7a86d4299643b5c82b6b1 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 19:05:48 -0400 Subject: [PATCH 011/152] Fix output messages. src/cpu/o3/decode_impl.hh: src/cpu/o3/rename_impl.hh: Fix output message. --HG-- extra : convert_revision : f226b84d0e15f275286b1ed078d341831370322b --- src/cpu/o3/decode_impl.hh | 2 +- src/cpu/o3/rename_impl.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 8a6ea6626f..0748ddb3b2 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -515,7 +515,7 @@ DefaultDecode::checkSignalsAndUpdate(unsigned tid) // Check ROB squash signals from commit. if (fromCommit->commitInfo[tid].robSquashing) { - DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); + DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid); // Continue to squash. decodeStatus[tid] = Squashing; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df33b98eef..f9e2a03ee9 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -1206,7 +1206,7 @@ DefaultRename::checkSignalsAndUpdate(unsigned tid) } DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." - " Adding to front of list.", tid); + " Adding to front of list.\n", tid); serializeInst[tid] = NULL; From b5cf61efad0acab998b17623ebb00f67cb1f6d50 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 12 Jun 2006 19:11:38 -0400 Subject: [PATCH 012/152] Fixes for checker. The RC/RS instructions check the interrupt flag, which isn't verifiable by the checker. src/arch/alpha/isa/decoder.isa: src/cpu/checker/cpu.cc: Fixes for checker. --HG-- extra : convert_revision : b0ec8f3c4a10453a567cd6691283fc498403795e --- src/arch/alpha/isa/decoder.isa | 4 ++-- src/cpu/checker/cpu.cc | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index fab2ca2e10..dd29e47e45 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -659,11 +659,11 @@ decode OPCODE default Unknown::unknown() { 0xe000: rc({{ Ra = xc->readIntrFlag(); xc->setIntrFlag(0); - }}, IsNonSpeculative); + }}, IsNonSpeculative, IsUnverifiable); 0xf000: rs({{ Ra = xc->readIntrFlag(); xc->setIntrFlag(1); - }}, IsNonSpeculative); + }}, IsNonSpeculative, IsUnverifiable); } #else format FailUnimpl { diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index ebc02f7beb..b1167c1d8f 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -84,6 +84,8 @@ CheckerCPU::CheckerCPU(Params *p) #else process = p->process; #endif + + result.integer = 0; } CheckerCPU::~CheckerCPU() From 7bcab0803fcb5055832d4858fe84d671c1a79d89 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 11:38:16 -0400 Subject: [PATCH 013/152] Compile fix. --HG-- extra : convert_revision : 20649b0b9b9c496aae22c19926c1166c8c0cc821 --- src/cpu/checker/cpu.cc | 1 + src/cpu/o3/lsq_unit_impl.hh | 1 + 2 files changed, 2 insertions(+) diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index ebc02f7beb..37c5a3aa29 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -38,6 +38,7 @@ #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" #include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" #include "sim/byteswap.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 6f32ec304b..4c01f29a24 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -32,6 +32,7 @@ #include "cpu/checker/cpu.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" +#include "mem/packet.hh" #include "mem/request.hh" template From 72e4b98b8dd29b72415a1294e9e35387027d6cf9 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 14:15:24 -0400 Subject: [PATCH 014/152] Add in DetailedCPU to test. --HG-- extra : convert_revision : 98c67b45af239e1cf5bad6888da6577a4c3bb45d --- configs/test/test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configs/test/test.py b/configs/test/test.py index 8c5b06e6aa..2ece9e6752 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -6,12 +6,14 @@ import os, optparse, sys import m5 from m5.objects import * +from FullO3Config import * # parse command-line arguments parser = optparse.OptionParser(option_list=m5.standardOptions) parser.add_option("-c", "--cmd", default="hello") parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-f", "--full", action="store_true") (options, args) = parser.parse_args() @@ -31,6 +33,8 @@ mem = PhysicalMemory() if options.timing: cpu = TimingSimpleCPU() +elif options.full: + cpu = DetailedCPU() else: cpu = AtomicSimpleCPU() cpu.workload = process From 4ad3d47464734e5747efe9f4158d32bdedc73abc Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 14:37:50 -0400 Subject: [PATCH 015/152] Add itb and dtb to checker when in full system mode. --HG-- extra : convert_revision : 6e272d484d04b018e7d48e2878ae3e21e8dc571e From dcf02c25e380b113bcf05e3b3a5bf79fc19b3150 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 14:39:05 -0400 Subject: [PATCH 016/152] Make syscalls serialize after instructions so they work properly on the new CPU model. --HG-- extra : convert_revision : c2cea5771e41d3c97d0e44559316363718d89abd --- src/arch/alpha/isa/decoder.isa | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index 2ecd9f5adb..d2908b27a2 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -701,7 +701,7 @@ decode OPCODE default Unknown::unknown() { }}, IsNonSpeculative); 0x83: callsys({{ xc->syscall(R0); - }}, IsNonSpeculative); + }}, IsSerializeAfter, IsNonSpeculative); // Read uniq reg into ABI return value register (r0) 0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess); // Write uniq reg with value from ABI arg register (r16) From 285b88a57b0111cc6698f2e30182dca17d8ea15a Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 13 Jun 2006 16:53:26 -0400 Subject: [PATCH 017/152] allow long opts to m5 and add a help flag back. --HG-- extra : convert_revision : 279cf97fe2e3098e2fe9c568c0336f97e41a14e4 --- src/sim/main.cc | 50 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/sim/main.cc b/src/sim/main.cc index 7419260564..f3b74489d5 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include @@ -113,6 +113,31 @@ abortHandler(int sigtype) #endif } +/// Simulator executable name +char *myProgName = ""; + +/// Show brief help message. +void +showBriefHelp(ostream &out) +{ + char *prog = basename(myProgName); + + ccprintf(out, "Usage:\n"); + ccprintf(out, +"%s [-p ] [-i ] [-h] \n" +"\n" +" -p, --path prepends to PYTHONPATH instead of using\n" +" built-in zip archive. Useful when developing/debugging\n" +" changes to built-in Python libraries, as the new Python\n" +" can be tested without building a new m5 binary.\n\n" +" -i, --interactive forces entry into interactive mode after the supplied\n" +" script is executed (just like the -i option to the\n" +" Python interpreter).\n\n" +" -h Prints this help\n\n" +" config file name (ends in .py)\n\n", + prog); + +} const char *briefCopyright = "Copyright (c) 2001-2006\n" @@ -145,6 +170,9 @@ extern "C" { void init_main(); } int main(int argc, char **argv) { + // Saze off program name + myProgName = argv[0]; + sayHello(cerr); signal(SIGFPE, SIG_IGN); // may occur on misspeculated paths @@ -161,9 +189,19 @@ main(int argc, char **argv) char *pythonpath = argv[0]; bool interactive = false; + bool show_help = false; bool getopt_done = false; + int opt_index = 0; + + static struct option long_options[] = { + {"python", 1, 0, 'p'}, + {"interactive", 0, 0, 'i'}, + {"help", 0, 0, 'h'}, + {0,0,0,0} + }; + do { - switch (getopt(argc, argv, "+p:i")) { + switch (getopt_long(argc, argv, "+p:ih", long_options, &opt_index)) { // -p prepends to PYTHONPATH instead of // using built-in zip archive. Useful when // developing/debugging changes to built-in Python @@ -180,6 +218,9 @@ main(int argc, char **argv) interactive = true; break; + case 'h': + show_help = true; + break; case -1: getopt_done = true; break; @@ -189,6 +230,11 @@ main(int argc, char **argv) } } while (!getopt_done); + if (show_help) { + showBriefHelp(cerr); + exit(1); + } + // Fix up argc & argv to hide arguments we just processed. // getopt() sets optind to the index of the first non-processed // argv element. From 2f043aafbcb039c11870c707f5d64e00f9693151 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 22:35:05 -0400 Subject: [PATCH 018/152] Minor updates for stats. src/cpu/o3/commit_impl.hh: src/cpu/o3/fetch.hh: Update stats comments. src/cpu/o3/fetch_impl.hh: Differentiate stats. src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: Update for stats. src/cpu/o3/lsq.hh: LSQ now has stats. src/cpu/o3/lsq_impl.hh: Register stats of all LSQ units. src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: Add in stats. --HG-- extra : convert_revision : 7672ecf3c02515b268c28d5a986af1432197654a --- src/cpu/o3/commit_impl.hh | 13 ---- src/cpu/o3/fetch.hh | 1 + src/cpu/o3/fetch_impl.hh | 6 +- src/cpu/o3/iew.hh | 38 ++++-------- src/cpu/o3/iew_impl.hh | 112 ++++++++++++++-------------------- src/cpu/o3/inst_queue.hh | 12 ++-- src/cpu/o3/inst_queue_impl.hh | 17 +----- src/cpu/o3/lsq.hh | 3 + src/cpu/o3/lsq_impl.hh | 12 +++- src/cpu/o3/lsq_unit.hh | 50 +++++++++------ src/cpu/o3/lsq_unit_impl.hh | 47 +++++++++++++- 11 files changed, 165 insertions(+), 146 deletions(-) diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 8384dbeadc..021d3ef902 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -204,19 +204,6 @@ DefaultCommit::regStats() .flags(total) ; - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // commitEligible .init(cpu->number_of_threads) .name(name() + ".COM:bw_limited") diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 76b32de68c..962d464372 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -421,6 +421,7 @@ class DefaultFetch Stats::Scalar<> icacheStallCycles; /** Stat for total number of fetched instructions. */ Stats::Scalar<> fetchedInsts; + /** Total number of fetched branches. */ Stats::Scalar<> fetchedBranches; /** Stat for total number of predicted branches. */ Stats::Scalar<> predictedBranches; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index af2aadf09e..477a1469cc 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -915,7 +915,11 @@ DefaultFetch::fetch(bool &status_change) bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); if (!fetch_success) { - ++fetchMiscStallCycles; + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } return; } } else { diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 455de7c3fd..615022dc94 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -437,14 +437,6 @@ class DefaultIEW Stats::Scalar<> iewIQFullEvents; /** Stat for number of times the LSQ becomes full. */ Stats::Scalar<> iewLSQFullEvents; - /** Stat for total number of executed instructions. */ - Stats::Scalar<> iewExecutedInsts; - /** Stat for total number of executed load instructions. */ - Stats::Vector<> iewExecLoadInsts; - /** Stat for total number of executed store instructions. */ -// Stats::Scalar<> iewExecStoreInsts; - /** Stat for total number of squashed instructions skipped at execute. */ - Stats::Scalar<> iewExecSquashedInsts; /** Stat for total number of memory ordering violation events. */ Stats::Scalar<> memOrderViolationEvents; /** Stat for total number of incorrect predicted taken branches. */ @@ -454,28 +446,25 @@ class DefaultIEW /** Stat for total number of mispredicted branches detected at execute. */ Stats::Formula branchMispredicts; + /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; + /** Stat for total number of executed load instructions. */ + Stats::Vector<> iewExecLoadInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; /** Number of executed software prefetches. */ - Stats::Vector<> exeSwp; + Stats::Vector<> iewExecutedSwp; /** Number of executed nops. */ - Stats::Vector<> exeNop; + Stats::Vector<> iewExecutedNop; /** Number of executed meomory references. */ - Stats::Vector<> exeRefs; + Stats::Vector<> iewExecutedRefs; /** Number of executed branches. */ - Stats::Vector<> exeBranches; - -// Stats::Vector<> issued_ops; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; -*/ - /** Number of instructions issued per cycle. */ - Stats::Formula issueRate; + Stats::Vector<> iewExecutedBranches; /** Number of executed store instructions. */ Stats::Formula iewExecStoreInsts; -// Stats::Formula issue_op_rate; -// Stats::Formula fu_busy_rate; + /** Number of instructions executed per cycle. */ + Stats::Formula iewExecRate; + /** Number of instructions sent to commit. */ Stats::Vector<> iewInstsToCommit; /** Number of instructions that writeback. */ @@ -488,7 +477,6 @@ class DefaultIEW * to resource contention. */ Stats::Vector<> wbPenalized; - /** Number of instructions per cycle written back. */ Stats::Formula wbRate; /** Average number of woken instructions per writeback. */ diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 0649f10ecb..b02ee8555f 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -93,6 +93,7 @@ DefaultIEW::regStats() using namespace Stats; instQueue.regStats(); + ldstQueue.regStats(); iewIdleCycles .name(name() + ".iewIdleCycles") @@ -138,20 +139,6 @@ DefaultIEW::regStats() .name(name() + ".iewLSQFullEvents") .desc("Number of times the LSQ has become full, causing a stall"); - iewExecutedInsts - .name(name() + ".iewExecutedInsts") - .desc("Number of executed instructions"); - - iewExecLoadInsts - .init(cpu->number_of_threads) - .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed") - .flags(total); - - iewExecSquashedInsts - .name(name() + ".iewExecSquashedInsts") - .desc("Number of squashed instructions skipped in execute"); - memOrderViolationEvents .name(name() + ".memOrderViolationEvents") .desc("Number of memory order violations"); @@ -170,114 +157,105 @@ DefaultIEW::regStats() branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; - exeSwp + iewExecutedInsts + .name(name() + ".EXEC:insts") + .desc("Number of executed instructions"); + + iewExecLoadInsts + .init(cpu->number_of_threads) + .name(name() + ".EXEC:loads") + .desc("Number of load instructions executed") + .flags(total); + + iewExecSquashedInsts + .name(name() + ".EXEC:squashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + iewExecutedSwp .init(cpu->number_of_threads) .name(name() + ".EXEC:swp") .desc("number of swp insts executed") - .flags(total) - ; + .flags(total); - exeNop + iewExecutedNop .init(cpu->number_of_threads) .name(name() + ".EXEC:nop") .desc("number of nop insts executed") - .flags(total) - ; + .flags(total); - exeRefs + iewExecutedRefs .init(cpu->number_of_threads) .name(name() + ".EXEC:refs") .desc("number of memory reference insts executed") - .flags(total) - ; + .flags(total); - exeBranches + iewExecutedBranches .init(cpu->number_of_threads) .name(name() + ".EXEC:branches") .desc("Number of branches executed") - .flags(total) - ; - - issueRate - .name(name() + ".EXEC:rate") - .desc("Inst execution rate") - .flags(total) - ; - issueRate = iewExecutedInsts / cpu->numCycles; + .flags(total); iewExecStoreInsts .name(name() + ".EXEC:stores") .desc("Number of stores executed") - .flags(total) - ; - iewExecStoreInsts = exeRefs - iewExecLoadInsts; -/* - for (int i=0; inumCycles; iewInstsToCommit .init(cpu->number_of_threads) .name(name() + ".WB:sent") .desc("cumulative count of insts sent to commit") - .flags(total) - ; + .flags(total); writebackCount .init(cpu->number_of_threads) .name(name() + ".WB:count") .desc("cumulative count of insts written-back") - .flags(total) - ; + .flags(total); producerInst .init(cpu->number_of_threads) .name(name() + ".WB:producers") .desc("num instructions producing a value") - .flags(total) - ; + .flags(total); consumerInst .init(cpu->number_of_threads) .name(name() + ".WB:consumers") .desc("num instructions consuming a value") - .flags(total) - ; + .flags(total); wbPenalized .init(cpu->number_of_threads) .name(name() + ".WB:penalized") .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate .name(name() + ".WB:penalized_rate") .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; + .flags(total); wbPenalizedRate = wbPenalized / writebackCount; wbFanout .name(name() + ".WB:fanout") .desc("average fanout of values written-back") - .flags(total) - ; + .flags(total); wbFanout = producerInst / consumerInst; wbRate .name(name() + ".WB:rate") .desc("insts written-back per cycle") - .flags(total) - ; + .flags(total); wbRate = writebackCount / cpu->numCycles; } @@ -1098,7 +1076,7 @@ DefaultIEW::dispatchInsts(unsigned tid) instQueue.recordProducer(inst); - exeNop[tid]++; + iewExecutedNop[tid]++; add_to_iq = false; } else if (inst->isExecuted()) { @@ -1509,9 +1487,9 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) // #ifdef TARGET_ALPHA if (inst->isDataPrefetch()) - exeSwp[thread_number]++; + iewExecutedSwp[thread_number]++; else - iewExecutedInsts++; + iewIewExecutedcutedInsts++; #else iewExecutedInsts++; #endif @@ -1520,13 +1498,13 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) // Control operations // if (inst->isControl()) - exeBranches[thread_number]++; + iewExecutedBranches[thread_number]++; // // Memory operations // if (inst->isMemRef()) { - exeRefs[thread_number]++; + iewExecutedRefs[thread_number]++; if (inst->isLoad()) { iewExecLoadInsts[thread_number]++; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 60a713020c..6fd3c6d0b3 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -474,12 +474,17 @@ class InstructionQueue /** Stat for number of non-speculative instructions removed due to a squash. */ Stats::Scalar<> iqSquashedNonSpecRemoved; + // Also include number of instructions rescheduled and replayed. - /** Distribution of number of instructions in the queue. */ + /** Distribution of number of instructions in the queue. + * @todo: Need to create struct to track the entry time for each + * instruction. */ Stats::VectorDistribution<> queueResDist; /** Distribution of the number of instructions issued. */ Stats::Distribution<> numIssuedDist; - /** Distribution of the cycles it takes to issue an instruction. */ + /** Distribution of the cycles it takes to issue an instruction. + * @todo: Need to create struct to track the ready time for each + * instruction. */ Stats::VectorDistribution<> issueDelayDist; /** Number of times an instruction could not be issued because a @@ -492,8 +497,7 @@ class InstructionQueue /** Number of instructions issued per cycle. */ Stats::Formula issueRate; -// Stats::Formula issue_stores; -// Stats::Formula issue_op_rate; + /** Number of times the FU was busy. */ Stats::Vector<> fuBusy; /** Number of times the FU was busy per instruction issued. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 06a052c6f7..66d4a54c67 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -289,22 +289,7 @@ InstructionQueue::regStats() .flags(total) ; issueRate = iqInstsIssued / cpu->numCycles; -/* - issue_stores - .name(name() + ".ISSUE:stores") - .desc("Number of stores issued") - .flags(total) - ; - issue_stores = exe_refs - exe_loads; -*/ -/* - issue_op_rate - .name(name() + ".ISSUE:op_rate") - .desc("Operation issue rate") - .flags(total) - ; - issue_op_rate = issued_ops / numCycles; -*/ + statFuBusy .init(Num_OpClasses) .name(name() + ".ISSUE:fu_full") diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index bc4154c854..1dbd46b8ed 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -62,6 +62,9 @@ class LSQ { /** Returns the name of the LSQ. */ std::string name() const; + /** Registers statistics of each LSQ unit. */ + void regStats(); + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); /** Sets the CPU pointer. */ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 27aa0dc3c6..0b6c6f542a 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -106,6 +106,16 @@ LSQ::name() const return iewStage->name() + ".lsq"; } +template +void +LSQ::regStats() +{ + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + template void LSQ::setActiveThreads(list *at_ptr) diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ce0cdd36f8..3de5815198 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -77,6 +77,9 @@ class LSQUnit { /** Returns the name of the LSQ unit. */ std::string name() const; + /** Registers statistics. */ + void regStats(); + /** Sets the CPU pointer. */ void setCPU(FullCPU *cpu_ptr); @@ -127,9 +130,6 @@ class LSQUnit { void completeDataAccess(PacketPtr pkt); - // @todo: Include stats in the LSQ unit. - //void regStats(); - /** Clears all the entries in the LQ. */ void clearLQ(); @@ -443,25 +443,35 @@ class LSQUnit { // Will also need how many read/write ports the Dcache has. Or keep track // of that in stage that is one level up, and only call executeLoad/Store // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; + /** Total number of loads forwaded from LSQ stores. */ + Stats::Scalar<> lsqForwLoads; - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; + /** Total number of loads ignored due to invalid addresses. */ + Stats::Scalar<> invAddrLoads; - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy + /** Total number of squashed loads. */ + Stats::Scalar<> lsqSquashedLoads; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed. */ + Stats::Scalar<> lsqIgnoredResponses; + + /** Total number of squashed stores. */ + Stats::Scalar<> lsqSquashedStores; + + /** Total number of software prefetches ignored due to invalid addresses. */ + Stats::Scalar<> invAddrSwpfs; + + /** Ready loads blocked due to partial store-forwarding. */ + Stats::Scalar<> lsqBlockedLoads; + + /** Number of loads that were rescheduled. */ + Stats::Scalar<> lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar<> lsqCacheBlocked; - Stats::Scalar<> lsqInversion; -*/ public: /** Executes the load at the given index. */ template @@ -519,6 +529,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) if (req->getFlags() & UNCACHEABLE && (load_idx != loadHead || !load_inst->reachedCommit)) { iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); } @@ -598,7 +609,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // @todo: Need to make this a parameter. wb->schedule(curTick); - // Should keep track of stat for forwarded data + ++lsqForwLoads; return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || (store_has_upper_limit && upper_load_has_store_part) || @@ -626,6 +637,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // Tell IQ/mem dep unit that this instruction will need to be // rescheduled eventually iewStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; // Do not generate a writeback event as this instruction is not // complete. @@ -633,6 +645,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) "Store idx %i to load addr %#x\n", store_idx, req->getVaddr()); + ++lsqBlockedLoads; return NoFault; } } @@ -660,6 +673,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // if we have a cache, do cache access too if (!dcachePort->sendTiming(data_pkt)) { + ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 4c01f29a24..a5c1eb12ac 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -196,6 +196,47 @@ LSQUnit::name() const } } +template +void +LSQUnit::regStats() +{ + lsqForwLoads + .name(name() + ".forwLoads") + .desc("Number of loads that had data forwarded from stores"); + + invAddrLoads + .name(name() + ".invAddrLoads") + .desc("Number of loads ignored due to an invalid address"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + invAddrSwpfs + .name(name() + ".invAddrSwpfs") + .desc("Number of software prefetches ignored due to an invalid address"); + + lsqBlockedLoads + .name(name() + ".blockedLoads") + .desc("Number of blocked loads due to partial load-store forwarding"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + template void LSQUnit::clearLQ() @@ -618,7 +659,7 @@ LSQUnit::writebackStores() if (!dcachePort->sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; - + ++lsqCacheBlocked; assert(retryPkt == NULL); retryPkt = data_pkt; } else { @@ -677,6 +718,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) loadTail = load_idx; decrLdIdx(load_idx); + ++lsqSquashedLoads; } if (isLoadBlocked) { @@ -723,6 +765,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) storeTail = store_idx; decrStIdx(store_idx); + ++lsqSquashedStores; } } @@ -782,6 +825,7 @@ LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) // Squashed instructions do not need to complete their access. if (inst->isSquashed()) { assert(!inst->isStore()); + ++lsqIgnoredResponses; return; } @@ -858,6 +902,7 @@ LSQUnit::recvRetry() isStoreBlocked = false; } else { // Still blocked! + ++lsqCacheBlocked; } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " From 133903b28d06842ec8fd30ae8b439c37e33da31b Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 13 Jun 2006 22:39:31 -0400 Subject: [PATCH 019/152] Add in a few global options. Feel free to rename them, they're just the first thing that came to mind. src/python/m5/__init__.py: Add in a few global options. --HG-- extra : convert_revision : e0dba78dd60f565a2e5cbda2cd6cf221bb3f4688 --- src/python/m5/__init__.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 60a61d66eb..2d4825b0ef 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -64,11 +64,34 @@ def AddToPath(path): def setTraceFlags(option, opt_str, value, parser): objects.Trace.flags = value +def setTraceStart(option, opt_str, value, parser): + objects.Trace.start = value + +def clearPCSymbol(option, opt_str, value, parser): + objects.ExecutionTrace.pc_symbol = False + +def clearPrintCycle(option, opt_str, value, parser): + objects.ExecutionTrace.print_cycle = False + +def statsTextFile(option, opt_str, value, parser): + objects.Statistics.text_file = value + # Standard optparse options. Need to be explicitly included by the # user script when it calls optparse.OptionParser(). standardOptions = [ optparse.make_option("--traceflags", type="string", action="callback", - callback=setTraceFlags) + callback=setTraceFlags), + optparse.make_option("--tracestart", type="int", action="callback", + callback=setTraceStart), + optparse.make_option("--nopcsymbol", action="callback", + callback=clearPCSymbol, + help="Turn off printing PC symbols in trace output"), + optparse.make_option("--noprintcycle", action="callback", + callback=clearPrintCycle, + help="Turn off printing cycles in trace output"), + optparse.make_option("--statsfile", type="string", action="callback", + callback=statsTextFile, metavar="FILE", + help="Sets the output file for the statistics") ] # make a SmartDict out of the build options for our local use From e981a97dec3df921f3800fd9ae5ec01ed4e9d2b1 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Tue, 13 Jun 2006 23:19:28 -0400 Subject: [PATCH 020/152] Move SimObject creation and Port connection loops into Python. Add Port and VectorPort objects and support for specifying port connections via assignment. The whole C++ ConfigNode hierarchy is gone now, as are C++ Connector objects. configs/test/fs.py: configs/test/test.py: Rewrite for new port connector syntax. src/SConscript: Remove unneeded files: - mem/connector.* - sim/config* src/dev/io_device.hh: src/mem/bridge.cc: src/mem/bridge.hh: src/mem/bus.cc: src/mem/bus.hh: src/mem/mem_object.hh: src/mem/physical.cc: src/mem/physical.hh: Allow getPort() to take an optional index to support vector ports (eventually). src/python/m5/__init__.py: Move SimObject construction and port connection operations into Python (with C++ calls). src/python/m5/config.py: Move SimObject construction and port connection operations into Python (with C++ calls). Add support for declaring and connecting MemObject ports in Python. src/python/m5/objects/Bus.py: src/python/m5/objects/PhysicalMemory.py: Add port declaration. src/sim/builder.cc: src/sim/builder.hh: src/sim/serialize.cc: src/sim/serialize.hh: ConfigNodes are gone; builder just gets the name of a .ini file section now. src/sim/main.cc: Move SimObject construction and port connection operations into Python (with C++ calls). Split remaining initialization operations into two parts, loadIniFile() and finalInit(). src/sim/param.cc: src/sim/param.hh: SimObject resolution done globally in Python now (not via ConfigNode hierarchy). src/sim/sim_object.cc: Remove unneeded #include. --HG-- extra : convert_revision : 2fa4001eaaec0c9a4231ef6e854f8e156d930dfe --- configs/test/fs.py | 64 ++++++------ configs/test/test.py | 2 +- src/SConscript | 2 - src/dev/io_device.hh | 4 +- src/mem/bridge.cc | 2 +- src/mem/bridge.hh | 2 +- src/mem/bus.cc | 2 +- src/mem/bus.hh | 2 +- src/mem/mem_object.hh | 2 +- src/mem/physical.cc | 4 +- src/mem/physical.hh | 2 +- src/python/m5/__init__.py | 15 ++- src/python/m5/config.py | 125 +++++++++++++++++++++++- src/python/m5/objects/Bus.py | 1 + src/python/m5/objects/PhysicalMemory.py | 1 + src/sim/builder.cc | 22 ++--- src/sim/builder.hh | 26 ++--- src/sim/main.cc | 115 ++++++++++++++++++---- src/sim/param.cc | 22 +---- src/sim/param.hh | 10 +- src/sim/serialize.cc | 9 +- src/sim/serialize.hh | 10 +- src/sim/sim_object.cc | 1 - 23 files changed, 308 insertions(+), 137 deletions(-) diff --git a/configs/test/fs.py b/configs/test/fs.py index 55e7003a49..333a4dfafe 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -154,40 +154,40 @@ class LinuxAlphaSystem(LinuxAlphaSystem): magicbus2 = Bus(bus_id=1) bridge = Bridge() physmem = PhysicalMemory(range = AddrRange('128MB')) - c0a = Connector(side_a=Parent.magicbus, side_b=Parent.bridge, side_b_name="side_a") - c0b = Connector(side_a=Parent.magicbus2, side_b=Parent.bridge, side_b_name="side_b") + bridge.side_a = magicbus + bridge.side_b = magicbus2 c1 = Connector(side_a=Parent.physmem, side_b=Parent.magicbus2) tsunami = LinuxTsunami() - c2 = Connector(side_a=Parent.tsunami.cchip, side_a_name='pio', side_b=Parent.magicbus) - c3 = Connector(side_a=Parent.tsunami.pchip, side_a_name='pio', side_b=Parent.magicbus) - c4 = Connector(side_a=Parent.tsunami.pciconfig, side_a_name='pio', side_b=Parent.magicbus) - c5 = Connector(side_a=Parent.tsunami.fake_sm_chip, side_a_name='pio', side_b=Parent.magicbus) - c6 = Connector(side_a=Parent.tsunami.ethernet, side_a_name='pio', side_b=Parent.magicbus) - c6a = Connector(side_a=Parent.tsunami.ethernet, side_a_name='dma', side_b=Parent.magicbus) - c7 = Connector(side_a=Parent.tsunami.fake_uart1, side_a_name='pio', side_b=Parent.magicbus) - c8 = Connector(side_a=Parent.tsunami.fake_uart2, side_a_name='pio', side_b=Parent.magicbus) - c9 = Connector(side_a=Parent.tsunami.fake_uart3, side_a_name='pio', side_b=Parent.magicbus) - c10 = Connector(side_a=Parent.tsunami.fake_uart4, side_a_name='pio', side_b=Parent.magicbus) - c11 = Connector(side_a=Parent.tsunami.ide, side_a_name='pio', side_b=Parent.magicbus) - c13 = Connector(side_a=Parent.tsunami.ide, side_a_name='dma', side_b=Parent.magicbus) - c12 = Connector(side_a=Parent.tsunami.fake_ppc, side_a_name='pio', side_b=Parent.magicbus) - c14 = Connector(side_a=Parent.tsunami.fake_OROM, side_a_name='pio', side_b=Parent.magicbus) - c16 = Connector(side_a=Parent.tsunami.fake_pnp_addr, side_a_name='pio', side_b=Parent.magicbus) - c17 = Connector(side_a=Parent.tsunami.fake_pnp_write, side_a_name='pio', side_b=Parent.magicbus) - c18 = Connector(side_a=Parent.tsunami.fake_pnp_read0, side_a_name='pio', side_b=Parent.magicbus) - c19 = Connector(side_a=Parent.tsunami.fake_pnp_read1, side_a_name='pio', side_b=Parent.magicbus) - c20 = Connector(side_a=Parent.tsunami.fake_pnp_read2, side_a_name='pio', side_b=Parent.magicbus) - c21 = Connector(side_a=Parent.tsunami.fake_pnp_read3, side_a_name='pio', side_b=Parent.magicbus) - c22 = Connector(side_a=Parent.tsunami.fake_pnp_read4, side_a_name='pio', side_b=Parent.magicbus) - c23 = Connector(side_a=Parent.tsunami.fake_pnp_read5, side_a_name='pio', side_b=Parent.magicbus) - c24 = Connector(side_a=Parent.tsunami.fake_pnp_read6, side_a_name='pio', side_b=Parent.magicbus) - c25 = Connector(side_a=Parent.tsunami.fake_pnp_read7, side_a_name='pio', side_b=Parent.magicbus) - c27 = Connector(side_a=Parent.tsunami.fake_ata0, side_a_name='pio', side_b=Parent.magicbus) - c28 = Connector(side_a=Parent.tsunami.fake_ata1, side_a_name='pio', side_b=Parent.magicbus) - c30 = Connector(side_a=Parent.tsunami.fb, side_a_name='pio', side_b=Parent.magicbus) - c31 = Connector(side_a=Parent.tsunami.io, side_a_name='pio', side_b=Parent.magicbus) - c32 = Connector(side_a=Parent.tsunami.uart, side_a_name='pio', side_b=Parent.magicbus) - c33 = Connector(side_a=Parent.tsunami.console, side_a_name='pio', side_b=Parent.magicbus) + tsunami.cchip.pio = magicbus + tsunami.pchip.pio = magicbus + tsunami.pciconfig.pio = magicbus + tsunami.fake_sm_chip.pio = magicbus + tsunami.ethernet.pio = magicbus + tsunami.ethernet.dma = magicbus + tsunami.fake_uart1.pio = magicbus + tsunami.fake_uart2.pio = magicbus + tsunami.fake_uart3.pio = magicbus + tsunami.fake_uart4.pio = magicbus + tsunami.ide.pio = magicbus + tsunami.ide.dma = magicbus + tsunami.fake_ppc.pio = magicbus + tsunami.fake_OROM.pio = magicbus + tsunami.fake_pnp_addr.pio = magicbus + tsunami.fake_pnp_write.pio = magicbus + tsunami.fake_pnp_read0.pio = magicbus + tsunami.fake_pnp_read1.pio = magicbus + tsunami.fake_pnp_read2.pio = magicbus + tsunami.fake_pnp_read3.pio = magicbus + tsunami.fake_pnp_read4.pio = magicbus + tsunami.fake_pnp_read5.pio = magicbus + tsunami.fake_pnp_read6.pio = magicbus + tsunami.fake_pnp_read7.pio = magicbus + tsunami.fake_ata0.pio = magicbus + tsunami.fake_ata1.pio = magicbus + tsunami.fb.pio = magicbus + tsunami.io.pio = magicbus + tsunami.uart.pio = magicbus + tsunami.console.pio = magicbus raw_image = RawDiskImage(image_file=disk('linux-latest.img'), read_only=True) simple_disk = SimpleDisk(disk=Parent.raw_image) diff --git a/configs/test/test.py b/configs/test/test.py index 2ece9e6752..ae85af1128 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -41,7 +41,7 @@ cpu.workload = process cpu.mem = magicbus system = System(physmem = mem, cpu = cpu) -system.c1 = Connector(side_a = mem, side_b = magicbus) +mem.port = magicbus.port root = Root(system = system) # instantiate configuration diff --git a/src/SConscript b/src/SConscript index a1c18711c7..e6ed43804c 100644 --- a/src/SConscript +++ b/src/SConscript @@ -97,14 +97,12 @@ base_sources = Split(''' mem/bridge.cc mem/bus.cc - mem/connector.cc mem/mem_object.cc mem/packet.cc mem/physical.cc mem/port.cc sim/builder.cc - sim/configfile.cc sim/debug.cc sim/eventq.cc sim/faults.cc diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index 195ca0fb76..cd2c25eeb7 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -247,7 +247,7 @@ class PioDevice : public MemObject virtual void init(); - virtual Port *getPort(const std::string &if_name) + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) @@ -309,7 +309,7 @@ class DmaDevice : public PioDevice bool dmaPending() { return dmaPort->dmaPending(); } - virtual Port *getPort(const std::string &if_name) + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 3718cbaaf3..29ea2e12f4 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -59,7 +59,7 @@ Bridge::Bridge(const std::string &n, int qsa, int qsb, } Port * -Bridge::getPort(const std::string &if_name) +Bridge::getPort(const std::string &if_name, int idx) { BridgePort *port; diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 37fb92662e..b3525d3e0f 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -177,7 +177,7 @@ class Bridge : public MemObject public: /** A function used to return the port associated with this bus object. */ - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); virtual void init(); diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 919acd23c7..19a3dc9e40 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -38,7 +38,7 @@ #include "sim/builder.hh" Port * -Bus::getPort(const std::string &if_name) +Bus::getPort(const std::string &if_name, int idx) { // if_name ignored? forced to be empty? int id = interfaces.size(); diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 50bfba6e4a..c2b78c31fd 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -161,7 +161,7 @@ class Bus : public MemObject public: /** A function used to return the port associated with this bus object. */ - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); virtual void init(); diff --git a/src/mem/mem_object.hh b/src/mem/mem_object.hh index ac547619d2..c81ea03d83 100644 --- a/src/mem/mem_object.hh +++ b/src/mem/mem_object.hh @@ -50,7 +50,7 @@ class MemObject : public SimObject public: /** Additional function to return the Port of a memory object. */ - virtual Port *getPort(const std::string &if_name) = 0; + virtual Port *getPort(const std::string &if_name, int idx = -1) = 0; }; #endif //__MEM_MEM_OBJECT_HH__ diff --git a/src/mem/physical.cc b/src/mem/physical.cc index fb31fb4a36..2d66602ab4 100644 --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -173,9 +173,9 @@ PhysicalMemory::doFunctionalAccess(Packet *pkt) } Port * -PhysicalMemory::getPort(const std::string &if_name) +PhysicalMemory::getPort(const std::string &if_name, int idx) { - if (if_name == "") { + if (if_name == "port" && idx == -1) { if (port != NULL) panic("PhysicalMemory::getPort: additional port requested to memory!"); port = new MemoryPort(name() + "-port", this); diff --git a/src/mem/physical.hh b/src/mem/physical.hh index 88ea543da5..50fa75ed3e 100644 --- a/src/mem/physical.hh +++ b/src/mem/physical.hh @@ -108,7 +108,7 @@ class PhysicalMemory : public MemObject public: int deviceBlockSize(); void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); void virtual init(); // fast back-door memory access for vtophys(), remote gdb, etc. diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 60a61d66eb..208d11b694 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -80,6 +80,16 @@ build_env.update(defines.m5_build_env) env = smartdict.SmartDict() env.update(os.environ) + +# Function to provide to C++ so it can look up instances based on paths +def resolveSimObject(name): + obj = config.instanceDict[name] + if not obj._ccObject: + obj.createCCObject() + if obj._ccObject == -1: + panic("resolveSimObject: recursive lookup error on %s" % name) + return obj._ccObject + # The final hook to generate .ini files. Called from the user script # once the config is built. def instantiate(root): @@ -89,7 +99,10 @@ def instantiate(root): root.print_ini() sys.stdout.close() # close config.ini sys.stdout = sys.__stdout__ # restore to original - main.initialize() # load config.ini into C++ and process it + main.loadIniFile(resolveSimObject) # load config.ini into C++ + root.createCCObject() + root.connectPorts() + main.finalInit() noDot = True # temporary until we fix dot if not noDot: dot = pydot.Dot() diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 97e13c900c..f23fd2c6fa 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -133,6 +133,10 @@ class Singleton(type): # ##################################################################### + +# dict to look up SimObjects based on path +instanceDict = {} + def isSimObject(value): return isinstance(value, SimObject) @@ -200,7 +204,8 @@ class MetaSimObject(type): 'type' : types.StringType } # Attributes that can be set any time keywords = { 'check' : types.FunctionType, - 'children' : types.ListType } + 'children' : types.ListType, + 'ccObject' : types.ObjectType } # __new__ is called before __init__, and is where the statements # in the body of the class definition get loaded into the class's @@ -233,6 +238,7 @@ class MetaSimObject(type): # initialize required attributes cls._params = multidict() cls._values = multidict() + cls._ports = multidict() cls._instantiated = False # really instantiated or subclassed cls._anon_subclass_counter = 0 @@ -248,6 +254,7 @@ class MetaSimObject(type): if isinstance(base, MetaSimObject): cls._params.parent = base._params cls._values.parent = base._values + cls._ports.parent = base._ports base._instantiated = True # now process the _init_dict items @@ -259,6 +266,10 @@ class MetaSimObject(type): elif isinstance(val, ParamDesc): cls._new_param(key, val) + # port objects + elif isinstance(val, Port): + cls._ports[key] = val + # init-time-only keywords elif cls.init_keywords.has_key(key): cls._set_keyword(key, val, cls.init_keywords[key]) @@ -313,6 +324,10 @@ class MetaSimObject(type): cls._set_keyword(attr, value, cls.keywords[attr]) return + if cls._ports.has_key(attr): + self._ports[attr].connect(self, attr, value) + return + # must be SimObject param param = cls._params.get(attr, None) if param: @@ -428,6 +443,9 @@ class SimObject(object): for key,val in kwargs.iteritems(): setattr(self, key, val) + self._ccObject = None # pointer to C++ object + self._port_map = {} # map of port connections + # Use this instance as a template to create a new class. def makeClass(self, memo = {}): cls = memo.get(self) @@ -443,6 +461,11 @@ class SimObject(object): "use makeClass() to make class first" def __getattr__(self, attr): + if self._ports.has_key(attr): + # return reference that can be assigned to another port + # via __setattr__ + return self._ports[attr].makeRef(self, attr) + if self._values.has_key(attr): return self._values[attr] @@ -457,6 +480,11 @@ class SimObject(object): object.__setattr__(self, attr, value) return + if self._ports.has_key(attr): + # set up port connection + self._ports[attr].connect(self, attr, value) + return + # must be SimObject param param = self._params.get(attr, None) if param: @@ -554,6 +582,8 @@ class SimObject(object): def print_ini(self): print '[' + self.path() + ']' # .ini section header + instanceDict[self.path()] = self + if hasattr(self, 'type') and not isinstance(self, ParamContext): print 'type=%s' % self.type @@ -585,6 +615,24 @@ class SimObject(object): for child in child_names: self._children[child].print_ini() + # Call C++ to create C++ object corresponding to this object and + # (recursively) all its children + def createCCObject(self): + if self._ccObject: + return + self._ccObject = -1 + self._ccObject = m5.main.createSimObject(self.path()) + for child in self._children.itervalues(): + child.createCCObject() + + # Create C++ port connections corresponding to the connections in + # _port_map (& recursively for all children) + def connectPorts(self): + for portRef in self._port_map.itervalues(): + applyOrMap(portRef, 'ccConnect') + for child in self._children.itervalues(): + child.connectPorts() + # generate output file for 'dot' to display as a pretty graph. # this code is currently broken. def outputDot(self, dot): @@ -1419,6 +1467,78 @@ MaxAddr = Addr.max MaxTick = Tick.max AllMemory = AddrRange(0, MaxAddr) + +##################################################################### +# +# Port objects +# +# Ports are used to interconnect objects in the memory system. +# +##################################################################### + +# Port reference: encapsulates a reference to a particular port on a +# particular SimObject. +class PortRef(object): + def __init__(self, simobj, name, isVec): + self.simobj = simobj + self.name = name + self.index = -1 + self.isVec = isVec # is this a vector port? + self.peer = None # not associated with another port yet + self.ccConnected = False # C++ port connection done? + + # Set peer port reference. Called via __setattr__ as a result of + # a port assignment, e.g., "obj1.port1 = obj2.port2". + def setPeer(self, other): + if self.isVec: + curMap = self.simobj._port_map.get(self.name, []) + self.index = len(curMap) + curMap.append(other) + else: + curMap = self.simobj._port_map.get(self.name) + if curMap and not self.isVec: + print "warning: overwriting port", self.simobj, self.name + curMap = other + self.simobj._port_map[self.name] = curMap + self.peer = other + + # Call C++ to create corresponding port connection between C++ objects + def ccConnect(self): + if self.ccConnected: # already done this + return + peer = self.peer + m5.main.connectPorts(self.simobj._ccObject, self.name, self.index, + peer.simobj._ccObject, peer.name, peer.index) + self.ccConnected = True + peer.ccConnected = True + +# Port description object. Like a ParamDesc object, this represents a +# logical port in the SimObject class, not a particular port on a +# SimObject instance. The latter are represented by PortRef objects. +class Port(object): + def __init__(self, desc): + self.desc = desc + self.isVec = False + + # Generate a PortRef for this port on the given SimObject with the + # given name + def makeRef(self, simobj, name): + return PortRef(simobj, name, self.isVec) + + # Connect an instance of this port (on the given SimObject with + # the given name) with the port described by the supplied PortRef + def connect(self, simobj, name, ref): + myRef = self.makeRef(simobj, name) + myRef.setPeer(ref) + ref.setPeer(myRef) + +# VectorPort description object. Like Port, but represents a vector +# of connections (e.g., as on a Bus). +class VectorPort(Port): + def __init__(self, desc): + Port.__init__(self, desc) + self.isVec = True + ##################################################################### # __all__ defines the list of symbols that get exported when @@ -1436,5 +1556,6 @@ __all__ = ['SimObject', 'ParamContext', 'Param', 'VectorParam', 'NetworkBandwidth', 'MemoryBandwidth', 'Range', 'AddrRange', 'MaxAddr', 'MaxTick', 'AllMemory', 'Null', 'NULL', - 'NextEthernetAddr'] + 'NextEthernetAddr', + 'Port', 'VectorPort'] diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index c37dab438a..019e150346 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -3,4 +3,5 @@ from MemObject import MemObject class Bus(MemObject): type = 'Bus' + port = VectorPort("vector port for connecting devices") bus_id = Param.Int(0, "blah") diff --git a/src/python/m5/objects/PhysicalMemory.py b/src/python/m5/objects/PhysicalMemory.py index bed90d5559..9cc7510a28 100644 --- a/src/python/m5/objects/PhysicalMemory.py +++ b/src/python/m5/objects/PhysicalMemory.py @@ -3,6 +3,7 @@ from MemObject import * class PhysicalMemory(MemObject): type = 'PhysicalMemory' + port = Port("the access port") range = Param.AddrRange("Device Address") file = Param.String('', "memory mapped file") latency = Param.Latency(Parent.clock, "latency of an access") diff --git a/src/sim/builder.cc b/src/sim/builder.cc index 121275c833..9074cc8995 100644 --- a/src/sim/builder.cc +++ b/src/sim/builder.cc @@ -33,17 +33,14 @@ #include "base/inifile.hh" #include "base/misc.hh" #include "sim/builder.hh" -#include "sim/configfile.hh" -#include "sim/config_node.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/root.hh" using namespace std; -SimObjectBuilder::SimObjectBuilder(ConfigNode *_configNode) - : ParamContext(_configNode->getPath(), NoAutoInit), - configNode(_configNode) +SimObjectBuilder::SimObjectBuilder(const std::string &_iniSection) + : ParamContext(_iniSection, NoAutoInit) { } @@ -78,8 +75,7 @@ SimObjectBuilder::parseParams(IniFile &iniFile) void SimObjectBuilder::printErrorProlog(ostream &os) { - ccprintf(os, "Error creating object '%s' of type '%s':\n", - iniSection, configNode->getType()); + ccprintf(os, "Error creating object '%s':\n", iniSection); } @@ -112,9 +108,13 @@ SimObjectClass::SimObjectClass(const string &className, CreateFunc createFunc) // // SimObject * -SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode) +SimObjectClass::createObject(IniFile &configDB, const std::string &iniSection) { - const string &type = configNode->getType(); + string type; + if (!configDB.find(iniSection, "type", type)) { + // no C++ type associated with this object + return NULL; + } // look up className to get appropriate createFunc if (classMap->find(type) == classMap->end()) @@ -125,7 +125,7 @@ SimObjectClass::createObject(IniFile &configDB, ConfigNode *configNode) // call createFunc with config hierarchy node to get object // builder instance (context with parameters for object creation) - SimObjectBuilder *objectBuilder = (*createFunc)(configNode); + SimObjectBuilder *objectBuilder = (*createFunc)(iniSection); assert(objectBuilder != NULL); @@ -166,7 +166,7 @@ SimObjectClass::describeAllClasses(ostream &os) os << "[" << className << "]\n"; // create dummy object builder just to instantiate parameters - SimObjectBuilder *objectBuilder = (*createFunc)(NULL); + SimObjectBuilder *objectBuilder = (*createFunc)(""); // now get the object builder to describe ite params objectBuilder->describeParams(os); diff --git a/src/sim/builder.hh b/src/sim/builder.hh index 8d08461557..2997fe5c33 100644 --- a/src/sim/builder.hh +++ b/src/sim/builder.hh @@ -55,14 +55,8 @@ class SimObject; // class SimObjectBuilder : public ParamContext { - private: - // The corresponding node in the configuration hierarchy. - // (optional: may be null if the created object is not in the - // hierarchy) - ConfigNode *configNode; - public: - SimObjectBuilder(ConfigNode *_configNode); + SimObjectBuilder(const std::string &_iniSection); virtual ~SimObjectBuilder(); @@ -77,9 +71,6 @@ class SimObjectBuilder : public ParamContext // configuration hierarchy node label and position) virtual const std::string &getInstanceName() { return iniSection; } - // return the configuration hierarchy node for this context. - virtual ConfigNode *getConfigNode() { return configNode; } - // Create the actual SimObject corresponding to the parameter // values in this context. This function is overridden in derived // classes to call a specific constructor for a particular @@ -125,7 +116,7 @@ class SimObjectClass // for the object (specified by the second string argument), and // an optional config hierarchy node (specified by the third // argument). A pointer to the new SimObjectBuilder is returned. - typedef SimObjectBuilder *(*CreateFunc)(ConfigNode *configNode); + typedef SimObjectBuilder *(*CreateFunc)(const std::string &iniSection); static std::map *classMap; @@ -137,7 +128,8 @@ class SimObjectClass // create SimObject given name of class and pointer to // configuration hierarchy node - static SimObject *createObject(IniFile &configDB, ConfigNode *configNode); + static SimObject *createObject(IniFile &configDB, + const std::string &iniSection); // print descriptions of all parameters registered with all // SimObject classes @@ -156,15 +148,15 @@ class OBJ_CLASS##Builder : public SimObjectBuilder \ #define END_DECLARE_SIM_OBJECT_PARAMS(OBJ_CLASS) \ \ - OBJ_CLASS##Builder(ConfigNode *configNode); \ + OBJ_CLASS##Builder(const std::string &iniSection); \ virtual ~OBJ_CLASS##Builder() {} \ \ OBJ_CLASS *create(); \ }; #define BEGIN_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS) \ -OBJ_CLASS##Builder::OBJ_CLASS##Builder(ConfigNode *configNode) \ - : SimObjectBuilder(configNode), + OBJ_CLASS##Builder::OBJ_CLASS##Builder(const std::string &iSec) \ + : SimObjectBuilder(iSec), #define END_INIT_SIM_OBJECT_PARAMS(OBJ_CLASS) \ @@ -176,9 +168,9 @@ OBJ_CLASS *OBJ_CLASS##Builder::create() #define REGISTER_SIM_OBJECT(CLASS_NAME, OBJ_CLASS) \ SimObjectBuilder * \ -new##OBJ_CLASS##Builder(ConfigNode *configNode) \ +new##OBJ_CLASS##Builder(const std::string &iniSection) \ { \ - return new OBJ_CLASS##Builder(configNode); \ + return new OBJ_CLASS##Builder(iniSection); \ } \ \ SimObjectClass the##OBJ_CLASS##Class(CLASS_NAME, \ diff --git a/src/sim/main.cc b/src/sim/main.cc index f3b74489d5..f63aec9cc4 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -57,9 +57,10 @@ #include "base/time.hh" #include "cpu/base.hh" #include "cpu/smt.hh" +#include "mem/mem_object.hh" +#include "mem/port.hh" #include "sim/async.hh" #include "sim/builder.hh" -#include "sim/configfile.hh" #include "sim/host.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" @@ -296,26 +297,109 @@ main(int argc, char **argv) Py_Finalize(); } +IniFile inifile; -/// Initialize C++ configuration. Exported to Python via SWIG; invoked -/// from m5.instantiate(). -void -initialize() +SimObject * +createSimObject(const string &name) { + return SimObjectClass::createObject(inifile, name); +} + + +/** + * Pointer to the Python function that maps names to SimObjects. + */ +PyObject *resolveFunc = NULL; + +/** + * Convert a pointer to the Python object that SWIG wraps around a C++ + * SimObject pointer back to the actual C++ pointer. See main.i. + */ +extern "C" SimObject *convertSwigSimObjectPtr(PyObject *); + + +SimObject * +resolveSimObject(const string &name) +{ + PyObject *pyPtr = PyEval_CallFunction(resolveFunc, "(s)", name.c_str()); + if (pyPtr == NULL) { + PyErr_Print(); + panic("resolveSimObject: failure on call to Python for %s", name); + } + + SimObject *simObj = convertSwigSimObjectPtr(pyPtr); + if (simObj == NULL) + panic("resolveSimObject: failure on pointer conversion for %s", name); + + return simObj; +} + + +/** + * Load config.ini into C++ database. Exported to Python via SWIG; + * invoked from m5.instantiate(). + */ +void +loadIniFile(PyObject *_resolveFunc) +{ + resolveFunc = _resolveFunc; configStream = simout.find("config.out"); // The configuration database is now complete; start processing it. - IniFile inifile; inifile.load("config.ini"); // Initialize statistics database Stats::InitSimStats(); +} - // Now process the configuration hierarchy and create the SimObjects. - ConfigHierarchy configHierarchy(inifile); - configHierarchy.build(); - configHierarchy.createSimObjects(); +/** + * Look up a MemObject port. Helper function for connectPorts(). + */ +Port * +lookupPort(SimObject *so, const std::string &name, int i) +{ + MemObject *mo = dynamic_cast(so); + if (mo == NULL) { + warn("error casting SimObject %s to MemObject", so->name()); + return NULL; + } + + Port *p = mo->getPort(name, i); + if (p == NULL) + warn("error looking up port %s on object %s", name, so->name()); + return p; +} + + +/** + * Connect the described MemObject ports. Called from Python via SWIG. + */ +int +connectPorts(SimObject *o1, const std::string &name1, int i1, + SimObject *o2, const std::string &name2, int i2) +{ + Port *p1 = lookupPort(o1, name1, i1); + Port *p2 = lookupPort(o2, name2, i2); + + if (p1 == NULL || p2 == NULL) { + warn("connectPorts: port lookup error"); + return 0; + } + + p1->setPeer(p2); + p2->setPeer(p1); + + return 1; +} + +/** + * Do final initialization steps after object construction but before + * start of simulation. + */ +void +finalInit() +{ // Parse and check all non-config-hierarchy parameters. ParamContext::parseAllContexts(inifile); ParamContext::checkAllContexts(); @@ -323,20 +407,13 @@ initialize() // Echo all parameter settings to stats file as well. ParamContext::showAllContexts(*configStream); - // Any objects that can't connect themselves until after construction should - // do so now - SimObject::connectAll(); - // Do a second pass to finish initializing the sim objects SimObject::initAll(); // Restore checkpointed state, if any. +#if 0 configHierarchy.unserializeSimObjects(); - - // Done processing the configuration database. - // Check for unreferenced entries. - if (inifile.printUnreferenced()) - panic("unreferenced sections/entries in the intermediate ini file"); +#endif SimObject::regAllStats(); diff --git a/src/sim/param.cc b/src/sim/param.cc index 7f648b8e11..b1c50946be 100644 --- a/src/sim/param.cc +++ b/src/sim/param.cc @@ -39,8 +39,6 @@ #include "base/range.hh" #include "base/str.hh" #include "base/trace.hh" -#include "sim/config_node.hh" -#include "sim/configfile.hh" #include "sim/param.hh" #include "sim/sim_object.hh" @@ -521,7 +519,9 @@ parseSimObjectParam(ParamContext *context, const string &s, SimObject *&value) obj = NULL; } else { - obj = context->resolveSimObject(s); + // defined in main.cc + extern SimObject *resolveSimObject(const string &); + obj = resolveSimObject(s); if (obj == NULL) return false; @@ -695,22 +695,6 @@ ParamContext::printErrorProlog(ostream &os) os << "Parameter error in section [" << iniSection << "]: " << endl; } -// -// Resolve an object name to a SimObject pointer. The object will be -// created as a side-effect if necessary. If the name contains a -// colon (e.g., "iq:IQ"), then the object is local (invisible to -// outside this context). If there is no colon, the name needs to be -// resolved through the configuration hierarchy (only possible for -// SimObjectBuilder objects, which return non-NULL for configNode()). -// -SimObject * -ParamContext::resolveSimObject(const string &name) -{ - ConfigNode *n = getConfigNode(); - return n ? n->resolveSimObject(name) : NULL; -} - - // // static method: call parseParams() on all registered contexts // diff --git a/src/sim/param.hh b/src/sim/param.hh index 49db17df9d..1bc55c1253 100644 --- a/src/sim/param.hh +++ b/src/sim/param.hh @@ -36,10 +36,10 @@ #include #include -#include "sim/configfile.hh" #include "sim/startup.hh" // forward decls +class IniFile; class BaseParam; class SimObject; @@ -132,18 +132,10 @@ class ParamContext : protected StartupCallback // print context information for parameter error virtual void printErrorProlog(std::ostream &); - // resolve a SimObject name in this context to an object pointer. - virtual SimObject *resolveSimObject(const std::string &name); - // generate the name for this instance of this context (used as a // prefix to create unique names in resolveSimObject() virtual const std::string &getInstanceName() { return iniSection; } - // return the configuration hierarchy node for this context. Bare - // ParamContext objects have no corresponding node, so the default - // implementation returns NULL. - virtual ConfigNode *getConfigNode() { return NULL; } - // Parse all parameters registered with all ParamContext objects. static void parseAllContexts(IniFile &iniFile); diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 5270802d1e..07e3b8a56b 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -44,7 +44,6 @@ #include "base/output.hh" #include "base/str.hh" #include "base/trace.hh" -#include "sim/config_node.hh" #include "sim/eventq.hh" #include "sim/param.hh" #include "sim/serialize.hh" @@ -442,9 +441,8 @@ Serializable::create(Checkpoint *cp, const std::string §ion) } -Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path, - const ConfigNode *_configNode) - : db(new IniFile), basePath(path), configNode(_configNode), cptDir(cpt_dir) +Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path) + : db(new IniFile), basePath(path), cptDir(cpt_dir) { string filename = cpt_dir + "/" + Checkpoint::baseFilename; if (!db->load(filename)) { @@ -470,9 +468,6 @@ Checkpoint::findObj(const std::string §ion, const std::string &entry, if (!db->find(section, entry, path)) return false; - if ((value = configNode->resolveSimObject(path)) != NULL) - return true; - if ((value = objMap[path]) != NULL) return true; diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 1eb721cf4f..1bcb235e6d 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -42,8 +42,8 @@ #include #include "sim/host.hh" -#include "sim/configfile.hh" +class IniFile; class Serializable; class Checkpoint; @@ -177,7 +177,7 @@ class SerializableClass // an optional config hierarchy node (specified by the third // argument). A pointer to the new SerializableBuilder is returned. typedef Serializable *(*CreateFunc)(Checkpoint *cp, - const std::string §ion); + const std::string §ion); static std::map *classMap; @@ -191,7 +191,7 @@ class SerializableClass // create Serializable given name of class and pointer to // configuration hierarchy node static Serializable *createObject(Checkpoint *cp, - const std::string §ion); + const std::string §ion); }; // @@ -209,12 +209,10 @@ class Checkpoint IniFile *db; const std::string basePath; - const ConfigNode *configNode; std::map objMap; public: - Checkpoint(const std::string &cpt_dir, const std::string &path, - const ConfigNode *_configNode); + Checkpoint(const std::string &cpt_dir, const std::string &path); const std::string cptDir; diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 117ca93250..97e6de439a 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -38,7 +38,6 @@ #include "base/trace.hh" #include "base/stats/events.hh" #include "base/serializer.hh" -#include "sim/configfile.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" From 5d11e8bff6a7feed1e126b9b28df3a69b21e94e4 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 14 Jun 2006 13:12:41 -0400 Subject: [PATCH 021/152] Minor code cleanup of BaseDynInst. src/cpu/base_dyn_inst.cc: src/cpu/base_dyn_inst.hh: Minor code cleanup by putting several bools into a bitset instead. src/cpu/o3/commit_impl.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/rename_impl.hh: src/cpu/o3/rob_impl.hh: Changed around some things in BaseDynInst. --HG-- extra : convert_revision : 1db363d69a863cc8744cc9f9ec542ade8472eb42 --- src/cpu/base_dyn_inst.cc | 35 +----- src/cpu/base_dyn_inst.hh | 214 +++++++++++++++------------------- src/cpu/o3/commit_impl.hh | 2 +- src/cpu/o3/decode_impl.hh | 4 +- src/cpu/o3/iew_impl.hh | 2 +- src/cpu/o3/inst_queue_impl.hh | 4 +- src/cpu/o3/lsq_unit.hh | 2 +- src/cpu/o3/lsq_unit_impl.hh | 4 +- src/cpu/o3/rename_impl.hh | 4 +- src/cpu/o3/rob_impl.hh | 2 +- 10 files changed, 108 insertions(+), 165 deletions(-) diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index e3829297d8..4bcd598480 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -99,39 +99,18 @@ BaseDynInst::initVars() memData = NULL; effAddr = 0; physEffAddr = 0; - storeSize = 0; readyRegs = 0; instResult.integer = 0; - // May want to turn this into a bit vector or something. - completed = false; - resultReady = false; - canIssue = false; - issued = false; - executed = false; - canCommit = false; - committed = false; - squashed = false; - squashedInIQ = false; - squashedInLSQ = false; - squashedInROB = false; + status.reset(); + eaCalcDone = false; memOpDone = false; + lqIdx = -1; sqIdx = -1; - reachedCommit = false; - - blockingInst = false; - recoverInst = false; - - iqEntry = false; - robEntry = false; - - serializeBefore = false; - serializeAfter = false; - serializeHandled = false; // Eventually make this a parameter. threadNumber = 0; @@ -294,7 +273,7 @@ void BaseDynInst::markSrcRegReady() { if (++readyRegs == numSrcRegs()) { - canIssue = true; + status.set(CanIssue); } } @@ -302,13 +281,9 @@ template void BaseDynInst::markSrcRegReady(RegIndex src_idx) { - ++readyRegs; - _readySrcRegIdx[src_idx] = true; - if (readyRegs == numSrcRegs()) { - canIssue = true; - } + markSrcRegReady(); } template diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index fc9bf8b947..f188e661c3 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -31,6 +31,7 @@ #ifndef __CPU_BASE_DYN_INST_HH__ #define __CPU_BASE_DYN_INST_HH__ +#include #include #include @@ -126,56 +127,34 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The sequence number of the instruction. */ InstSeqNum seqNum; - /** Is the instruction in the IQ */ - bool iqEntry; + enum Status { + IqEntry, /// Instruction is in the IQ + RobEntry, /// Instruction is in the ROB + LsqEntry, /// Instruction is in the LSQ + Completed, /// Instruction has completed + ResultReady, /// Instruction has its result + CanIssue, /// Instruction can issue and execute + Issued, /// Instruction has issued + Executed, /// Instruction has executed + CanCommit, /// Instruction can commit + AtCommit, /// Instruction has reached commit + Committed, /// Instruction has committed + Squashed, /// Instruction is squashed + SquashedInIQ, /// Instruction is squashed in the IQ + SquashedInLSQ, /// Instruction is squashed in the LSQ + SquashedInROB, /// Instruction is squashed in the ROB + RecoverInst, /// Is a recover instruction + BlockingInst, /// Is a blocking instruction + ThreadsyncWait, /// Is a thread synchronization instruction + SerializeBefore, /// Needs to serialize on + /// instructions ahead of it + SerializeAfter, /// Needs to serialize instructions behind it + SerializeHandled, /// Serialization has been handled + NumStatus + }; - /** Is the instruction in the ROB */ - bool robEntry; - - /** Is the instruction in the LSQ */ - bool lsqEntry; - - /** Is the instruction completed. */ - bool completed; - - /** Is the instruction's result ready. */ - bool resultReady; - - /** Can this instruction issue. */ - bool canIssue; - - /** Has this instruction issued. */ - bool issued; - - /** Has this instruction executed (or made it through execute) yet. */ - bool executed; - - /** Can this instruction commit. */ - bool canCommit; - - /** Is this instruction committed. */ - bool committed; - - /** Is this instruction squashed. */ - bool squashed; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInIQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInLSQ; - - /** Is this instruction squashed in the instruction queue. */ - bool squashedInROB; - - /** Is this a recover instruction. */ - bool recoverInst; - - /** Is this a thread blocking instruction. */ - bool blockingInst; /* this inst has called thread_block() */ - - /** Is this a thread syncrhonization instruction. */ - bool threadsyncWait; + /** The status of this BaseDynInst. Several bits can be set. */ + std::bitset status; /** The thread this instruction is from. */ short threadNumber; @@ -216,12 +195,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** The memory request flags (from translation). */ unsigned memReqFlags; - /** The size of the data to be stored. */ - int storeSize; - - /** The data to be stored. */ - IntReg storeData; - union Result { uint64_t integer; float fp; @@ -338,9 +311,9 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isThreadSync() const { return staticInst->isThreadSync(); } bool isSerializing() const { return staticInst->isSerializing(); } bool isSerializeBefore() const - { return staticInst->isSerializeBefore() || serializeBefore; } + { return staticInst->isSerializeBefore() || status[SerializeBefore]; } bool isSerializeAfter() const - { return staticInst->isSerializeAfter() || serializeAfter; } + { return staticInst->isSerializeAfter() || status[SerializeAfter]; } bool isMemBarrier() const { return staticInst->isMemBarrier(); } bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } @@ -349,41 +322,32 @@ class BaseDynInst : public FastAlloc, public RefCounted bool isUnverifiable() const { return staticInst->isUnverifiable(); } /** Temporarily sets this instruction as a serialize before instruction. */ - void setSerializeBefore() { serializeBefore = true; } + void setSerializeBefore() { status.set(SerializeBefore); } /** Clears the serializeBefore part of this instruction. */ - void clearSerializeBefore() { serializeBefore = false; } + void clearSerializeBefore() { status.reset(SerializeBefore); } /** Checks if this serializeBefore is only temporarily set. */ - bool isTempSerializeBefore() { return serializeBefore; } - - /** Tracks if instruction has been externally set as serializeBefore. */ - bool serializeBefore; + bool isTempSerializeBefore() { return status[SerializeBefore]; } /** Temporarily sets this instruction as a serialize after instruction. */ - void setSerializeAfter() { serializeAfter = true; } + void setSerializeAfter() { status.set(SerializeAfter); } /** Clears the serializeAfter part of this instruction.*/ - void clearSerializeAfter() { serializeAfter = false; } + void clearSerializeAfter() { status.reset(SerializeAfter); } /** Checks if this serializeAfter is only temporarily set. */ - bool isTempSerializeAfter() { return serializeAfter; } + bool isTempSerializeAfter() { return status[SerializeAfter]; } - /** Tracks if instruction has been externally set as serializeAfter. */ - bool serializeAfter; + /** Sets the serialization part of this instruction as handled. */ + void setSerializeHandled() { status.set(SerializeHandled); } /** Checks if the serialization part of this instruction has been * handled. This does not apply to the temporary serializing * state; it only applies to this instruction's own permanent * serializing state. */ - bool isSerializeHandled() { return serializeHandled; } - - /** Sets the serialization part of this instruction as handled. */ - void setSerializeHandled() { serializeHandled = true; } - - /** Whether or not the serialization of this instruction has been handled. */ - bool serializeHandled; + bool isSerializeHandled() { return status[SerializeHandled]; } /** Returns the opclass of this instruction. */ OpClass opClass() const { return staticInst->opClass(); } @@ -465,106 +429,112 @@ class BaseDynInst : public FastAlloc, public RefCounted } /** Sets this instruction as completed. */ - void setCompleted() { completed = true; } + void setCompleted() { status.set(Completed); } /** Returns whether or not this instruction is completed. */ - bool isCompleted() const { return completed; } + bool isCompleted() const { return status[Completed]; } - void setResultReady() { resultReady = true; } + /** Marks the result as ready. */ + void setResultReady() { status.set(ResultReady); } - bool isResultReady() const { return resultReady; } + /** Returns whether or not the result is ready. */ + bool isResultReady() const { return status[ResultReady]; } /** Sets this instruction as ready to issue. */ - void setCanIssue() { canIssue = true; } + void setCanIssue() { status.set(CanIssue); } /** Returns whether or not this instruction is ready to issue. */ - bool readyToIssue() const { return canIssue; } + bool readyToIssue() const { return status[CanIssue]; } /** Sets this instruction as issued from the IQ. */ - void setIssued() { issued = true; } + void setIssued() { status.set(Issued); } /** Returns whether or not this instruction has issued. */ - bool isIssued() const { return issued; } + bool isIssued() const { return status[Issued]; } /** Sets this instruction as executed. */ - void setExecuted() { executed = true; } + void setExecuted() { status.set(Executed); } /** Returns whether or not this instruction has executed. */ - bool isExecuted() const { return executed; } + bool isExecuted() const { return status[Executed]; } /** Sets this instruction as ready to commit. */ - void setCanCommit() { canCommit = true; } + void setCanCommit() { status.set(CanCommit); } /** Clears this instruction as being ready to commit. */ - void clearCanCommit() { canCommit = false; } + void clearCanCommit() { status.reset(CanCommit); } /** Returns whether or not this instruction is ready to commit. */ - bool readyToCommit() const { return canCommit; } + bool readyToCommit() const { return status[CanCommit]; } + + void setAtCommit() { status.set(AtCommit); } + + bool isAtCommit() { return status[AtCommit]; } /** Sets this instruction as committed. */ - void setCommitted() { committed = true; } + void setCommitted() { status.set(Committed); } /** Returns whether or not this instruction is committed. */ - bool isCommitted() const { return committed; } + bool isCommitted() const { return status[Committed]; } /** Sets this instruction as squashed. */ - void setSquashed() { squashed = true; } + void setSquashed() { status.set(Squashed); } /** Returns whether or not this instruction is squashed. */ - bool isSquashed() const { return squashed; } + bool isSquashed() const { return status[Squashed]; } //Instruction Queue Entry //----------------------- /** Sets this instruction as a entry the IQ. */ - void setInIQ() { iqEntry = true; } + void setInIQ() { status.set(IqEntry); } /** Sets this instruction as a entry the IQ. */ - void removeInIQ() { iqEntry = false; } - - /** Sets this instruction as squashed in the IQ. */ - void setSquashedInIQ() { squashedInIQ = true; squashed = true;} - - /** Returns whether or not this instruction is squashed in the IQ. */ - bool isSquashedInIQ() const { return squashedInIQ; } + void clearInIQ() { status.reset(IqEntry); } /** Returns whether or not this instruction has issued. */ - bool isInIQ() const { return iqEntry; } + bool isInIQ() const { return status[IqEntry]; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);} + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() const { return status[SquashedInIQ]; } //Load / Store Queue Functions //----------------------- /** Sets this instruction as a entry the LSQ. */ - void setInLSQ() { lsqEntry = true; } + void setInLSQ() { status.set(LsqEntry); } /** Sets this instruction as a entry the LSQ. */ - void removeInLSQ() { lsqEntry = false; } - - /** Sets this instruction as squashed in the LSQ. */ - void setSquashedInLSQ() { squashedInLSQ = true;} - - /** Returns whether or not this instruction is squashed in the LSQ. */ - bool isSquashedInLSQ() const { return squashedInLSQ; } + void removeInLSQ() { status.reset(LsqEntry); } /** Returns whether or not this instruction is in the LSQ. */ - bool isInLSQ() const { return lsqEntry; } + bool isInLSQ() const { return status[LsqEntry]; } + + /** Sets this instruction as squashed in the LSQ. */ + void setSquashedInLSQ() { status.set(SquashedInLSQ);} + + /** Returns whether or not this instruction is squashed in the LSQ. */ + bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } //Reorder Buffer Functions //----------------------- /** Sets this instruction as a entry the ROB. */ - void setInROB() { robEntry = true; } + void setInROB() { status.set(RobEntry); } /** Sets this instruction as a entry the ROB. */ - void removeInROB() { robEntry = false; } - - /** Sets this instruction as squashed in the ROB. */ - void setSquashedInROB() { squashedInROB = true; } - - /** Returns whether or not this instruction is squashed in the ROB. */ - bool isSquashedInROB() const { return squashedInROB; } + void clearInROB() { status.reset(RobEntry); } /** Returns whether or not this instruction is in the ROB. */ - bool isInROB() const { return robEntry; } + bool isInROB() const { return status[RobEntry]; } + + /** Sets this instruction as squashed in the ROB. */ + void setSquashedInROB() { status.set(SquashedInROB); } + + /** Returns whether or not this instruction is squashed in the ROB. */ + bool isSquashedInROB() const { return status[SquashedInROB]; } /** Read the PC of this instruction. */ const Addr readPC() const { return PC; } @@ -581,10 +551,10 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Sets the thread id. */ void setTid(unsigned tid) { threadNumber = tid; } + /** Sets the pointer to the thread state. */ void setThreadState(ImplState *state) { thread = state; } - /** Returns the thread context. - */ + /** Returns the thread context. */ ThreadContext *tcBase() { return thread->getTC(); } private: @@ -621,8 +591,6 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Store queue index. */ int16_t sqIdx; - bool reachedCommit; - /** Iterator pointing to this BaseDynInst in the list of all insts. */ ListIt instListIt; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 021d3ef902..b0c8bee773 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -907,7 +907,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // and committed this instruction. thread[tid]->funcExeInst--; - head_inst->reachedCommit = true; + head_inst->setAtCommit(); if (head_inst->isNonSpeculative() || head_inst->isStoreConditional() || diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 0748ddb3b2..48f6ee6124 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -296,7 +296,7 @@ DefaultDecode::squash(DynInstPtr &inst, unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid && fromFetch->insts[i]->seqNum > inst->seqNum) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); } } @@ -345,7 +345,7 @@ DefaultDecode::squash(unsigned tid) for (int i=0; isize; i++) { if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->squashed = true; + fromFetch->insts[i]->setSquashed(); squash_count++; } } diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b02ee8555f..6c207d94a7 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -579,7 +579,7 @@ DefaultIEW::validInstsFromRename() unsigned inst_count = 0; for (int i=0; isize; i++) { - if (!fromRename->insts[i]->squashed) + if (!fromRename->insts[i]->isSquashed()) inst_count++; } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 66d4a54c67..1ef1b2cffe 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -776,7 +776,7 @@ InstructionQueue::scheduleReadyInsts() // complete. ++freeEntries; count[tid]--; - issuing_inst->removeInIQ(); + issuing_inst->clearInIQ(); } else { memDepUnit[tid].issue(issuing_inst); } @@ -1082,7 +1082,7 @@ InstructionQueue::doSquash(unsigned tid) // inst will flow through the rest of the pipeline. squashed_inst->setIssued(); squashed_inst->setCanCommit(); - squashed_inst->removeInIQ(); + squashed_inst->clearInIQ(); //Update Thread IQ Count count[squashed_inst->threadNumber]--; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 3de5815198..2d700ddf15 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -527,7 +527,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) // at the head of the LSQ and are ready to commit (at the head of the ROB // too). if (req->getFlags() & UNCACHEABLE && - (load_idx != loadHead || !load_inst->reachedCommit)) { + (load_idx != loadHead || !load_inst->isAtCommit())) { iewStage->rescheduleMemInst(load_inst); ++lsqRescheduledLoads; return TheISA::genMachineCheckFault(); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index a5c1eb12ac..b48d7fb741 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -710,7 +710,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; + loadQueue[load_idx]->setSquashed(); loadQueue[load_idx] = NULL; --loads; @@ -754,7 +754,7 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst->setSquashed(); storeQueue[store_idx].inst = NULL; storeQueue[store_idx].canWB = 0; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index f9e2a03ee9..307022cb8e 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -341,7 +341,7 @@ DefaultRename::squash(unsigned tid) for (int i=0; isize; i++) { if (fromDecode->insts[i]->threadNumber == tid) { - fromDecode->insts[i]->squashed = true; + fromDecode->insts[i]->setSquashed(); wroteToTimeBuffer = true; squashCount++; } @@ -1022,7 +1022,7 @@ DefaultRename::validInsts() unsigned inst_count = 0; for (int i=0; isize; i++) { - if (!fromDecode->insts[i]->squashed) + if (!fromDecode->insts[i]->isSquashed()) inst_count++; } diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 97694e3717..5a941834b6 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -276,7 +276,7 @@ ROB::retireHead(unsigned tid) --numInstsInROB; --threadEntries[tid]; - head_inst->removeInROB(); + head_inst->clearInROB(); head_inst->setCommitted(); instList[tid].erase(head_it); From 07e679fe85c7cd5830f08423696a41f6141f5caf Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Wed, 14 Jun 2006 16:12:56 -0400 Subject: [PATCH 022/152] add a comment that should be able to pass --help after config file --HG-- extra : convert_revision : 0229d0c2f4f2615fa744561f32773b4c8160e81a --- src/sim/main.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sim/main.cc b/src/sim/main.cc index f3b74489d5..f2830cca2e 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -134,7 +134,9 @@ showBriefHelp(ostream &out) " script is executed (just like the -i option to the\n" " Python interpreter).\n\n" " -h Prints this help\n\n" -" config file name (ends in .py)\n\n", +" config file name which ends in .py. (Normally you can\n" +" run --help to get help on that config files\n" +" parameters.\n\n", prog); } From 38ecb6a2ee7fbe4e24f83e50f8ca95b04197e0f9 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Wed, 14 Jun 2006 19:31:21 -0400 Subject: [PATCH 023/152] -luxc1 fix -noop templates -trap disassembly src/arch/mips/isa/decoder.isa: luxc1 uses doubleword, not single src/arch/mips/isa/formats/int.isa: use new nop decode template src/arch/mips/isa/formats/mem.isa: Noop templates src/arch/mips/isa/formats/noop.isa: redo noop templates src/arch/mips/isa/formats/trap.isa: fix for trap disassembly --HG-- extra : convert_revision : 56f13e88abdcbd03ab828cff5d775c993157ae96 --- src/arch/mips/isa/decoder.isa | 2 +- src/arch/mips/isa/formats/int.isa | 8 ++-- src/arch/mips/isa/formats/mem.isa | 19 ++-------- src/arch/mips/isa/formats/noop.isa | 59 +++++++++++++----------------- src/arch/mips/isa/formats/trap.isa | 8 ++-- 5 files changed, 39 insertions(+), 57 deletions(-) diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 14aa0cc77c..8b9d8eeb1b 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -859,7 +859,7 @@ decode OPCODE_HI default Unknown::unknown() { format LoadIndexedMemory { 0x0: lwxc1({{ Fd.uw = Mem.uw;}}); 0x1: ldxc1({{ Fd.ud = Mem.ud;}}); - 0x5: luxc1({{ Fd.uw = Mem.ud;}}, + 0x5: luxc1({{ Fd.ud = Mem.ud;}}, {{ EA = (Rs + Rt) & ~7; }}); } } diff --git a/src/arch/mips/isa/formats/int.isa b/src/arch/mips/isa/formats/int.isa index 7b5affb5cd..8ac50b0097 100644 --- a/src/arch/mips/isa/formats/int.isa +++ b/src/arch/mips/isa/formats/int.isa @@ -228,7 +228,7 @@ def format IntOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'IntOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = RegNopCheckDecode.subst(iop) exec_output = BasicExecute.subst(iop) }}; @@ -236,7 +236,7 @@ def format IntImmOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'IntImmOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = ImmNopCheckDecode.subst(iop) exec_output = BasicExecute.subst(iop) }}; @@ -252,7 +252,7 @@ def format HiLoOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'HiLoOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = BasicDecode.subst(iop) exec_output = HiLoExecute.subst(iop) }}; @@ -260,7 +260,7 @@ def format HiLoMiscOp(code, *opt_flags) {{ iop = InstObjParams(name, Name, 'HiLoMiscOp', CodeBlock(code), opt_flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) - decode_block = OperateNopCheckDecode.subst(iop) + decode_block = BasicDecode.subst(iop) exec_output = HiLoExecute.subst(iop) }}; diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index cf6d3de747..5a8930a085 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -499,23 +499,11 @@ def template MiscCompleteAcc {{ } }}; -// load instructions use Rt as dest, so check for -// Rt == 0 to detect nops -def template LoadNopCheckDecode {{ - { - MipsStaticInst *i = new %(class_name)s(machInst); - if (RT == 0) { - i = makeNop(i); - } - return i; - } -}}; - def format LoadMemory(memacc_code, ea_code = {{ EA = Rs + disp; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -530,7 +518,7 @@ def format LoadIndexedMemory(memacc_code, ea_code = {{ EA = Rs + Rt; }}, mem_flags = [], inst_flags = []) {{ (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -554,7 +542,7 @@ def format LoadUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3; (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, + decode_template = ImmNopCheckDecode, exec_template_base = 'Load') }}; @@ -571,7 +559,6 @@ def format StoreUnalignedMemory(memacc_code, ea_code = {{ EA = (Rs + disp) & ~3; (header_output, decoder_output, decode_block, exec_output) = \ LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, - decode_template = LoadNopCheckDecode, exec_template_base = 'Store') }}; diff --git a/src/arch/mips/isa/formats/noop.isa b/src/arch/mips/isa/formats/noop.isa index 4fd8235e4b..430c32f82e 100644 --- a/src/arch/mips/isa/formats/noop.isa +++ b/src/arch/mips/isa/formats/noop.isa @@ -1,33 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer; -// redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution; -// neither the name of the copyright holders nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: Korey Sewell - //////////////////////////////////////////////////////////////////// // // Nop @@ -73,7 +45,8 @@ output decoder {{ MipsStaticInst * makeNop(MipsStaticInst *inst) { - MipsStaticInst *nop = new Nop(inst->disassemble(0), inst->machInst); + std::string nop_str = "(" + inst->disassemble(0) + ")"; + MipsStaticInst *nop = new Nop(nop_str, inst->machInst); delete inst; return nop; } @@ -87,16 +60,36 @@ output exec {{ } }}; -// integer & FP operate instructions use RT as dest, so check for -// RT == 0 to detect nops +// Int & FP operate instructions use RD as dest, so check for +// RD == 0 to detect nops +def template RegNopCheckDecode {{ + { + MipsStaticInst *i = new %(class_name)s(machInst); + //if (RD == 0) { + //i = makeNop(i); + //} + return i; + } +}}; + def template OperateNopCheckDecode {{ { MipsStaticInst *i = new %(class_name)s(machInst); - //if (RD == 0) { - // i = makeNop(i); + // i = makeNop(i); //} + return i; + } +}}; +// IntImm & Memory instructions use Rt as dest, so check for +// Rt == 0 to detect nops +def template ImmNopCheckDecode {{ + { + MipsStaticInst *i = new %(class_name)s(machInst); + //if (RT == 0) { + // i = makeNop(i); + // } return i; } }}; diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa index 574b808cce..6692e8ef87 100644 --- a/src/arch/mips/isa/formats/trap.isa +++ b/src/arch/mips/isa/formats/trap.isa @@ -70,9 +70,11 @@ def template TrapExecute {{ }}; def format Trap(code, *flags) {{ - code = 'panic(\"' - code += 'Trap Exception Handler Is Currently Not Implemented.' - code += '\");' + warn_code = 'warn(\"' + warn_code += 'Trap Exception Handler Is Currently Not Implemented.' + warn_code += '\");' + code = warn_code + "bool " + code + iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) From 7cd362ca4e9394967f8680593b657b0bdd39d29e Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Wed, 14 Jun 2006 19:45:15 -0400 Subject: [PATCH 024/152] add cycle to exit message src/arch/mips/isa/formats/trap.isa: Take out fix that tried to fix trap instruction disassembly. It forces bad compile .. configs/test/test.py: add 'cycle' to exit message --HG-- extra : convert_revision : 568877797fd2806416b4cbb388cc3f7eb2492627 --- configs/test/test.py | 2 +- src/arch/mips/isa/formats/trap.isa | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/configs/test/test.py b/configs/test/test.py index 2ece9e6752..9ab5f6e006 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -50,5 +50,5 @@ m5.instantiate(root) # simulate until program terminates exit_event = m5.simulate() -print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa index 6692e8ef87..8a9c5822a2 100644 --- a/src/arch/mips/isa/formats/trap.isa +++ b/src/arch/mips/isa/formats/trap.isa @@ -70,11 +70,9 @@ def template TrapExecute {{ }}; def format Trap(code, *flags) {{ - warn_code = 'warn(\"' - warn_code += 'Trap Exception Handler Is Currently Not Implemented.' - warn_code += '\");' - code = warn_code + "bool " + code - + code = 'warn(\"' + code += 'Trap Exception Handler Is Currently Not Implemented.' + code += '\");' iop = InstObjParams(name, Name, 'MipsStaticInst', CodeBlock(code), flags) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) From 7b44630b958c942dca2b1de906ea9e719a3f6ff4 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Wed, 14 Jun 2006 19:53:36 -0400 Subject: [PATCH 025/152] change back, BK is acting up --HG-- extra : convert_revision : 11fd5ebbca0408b357e9186d1b3722eb571e874e --- configs/test/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/test/test.py b/configs/test/test.py index 9ab5f6e006..2ece9e6752 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -50,5 +50,5 @@ m5.instantiate(root) # simulate until program terminates exit_event = m5.simulate() -print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() From 1c55389578c0b17aa9a81f64887e7a6f02110ce4 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Wed, 14 Jun 2006 22:01:36 -0400 Subject: [PATCH 026/152] tried to undo change and it didnt work so might as well put it back --HG-- extra : convert_revision : 9793917e8a3e4d30f59ff469e4f08da96ce001f9 --- configs/test/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/test/test.py b/configs/test/test.py index 2ece9e6752..9ab5f6e006 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -50,5 +50,5 @@ m5.instantiate(root) # simulate until program terminates exit_event = m5.simulate() -print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() From 74b9868c786a8c58ef8fe65bebd2fd8e9573d0be Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Thu, 15 Jun 2006 01:00:15 -0400 Subject: [PATCH 027/152] Mips Code Cleanup: Fix some author stuff and copyright dates Take out full system code src/arch/mips/isa/base.isa: src/arch/mips/isa/bitfields.isa: copyright info src/arch/mips/isa/decoder.isa: src/arch/mips/isa/formats/basic.isa: src/arch/mips/isa/formats/branch.isa: src/arch/mips/isa/formats/control.isa: src/arch/mips/isa/formats/fp.isa: src/arch/mips/isa/formats/int.isa: src/arch/mips/isa/formats/mem.isa: src/arch/mips/isa/formats/noop.isa: src/arch/mips/isa/formats/tlbop.isa: src/arch/mips/isa/formats/trap.isa: src/arch/mips/isa/formats/unimp.isa: src/arch/mips/isa/formats/unknown.isa: src/arch/mips/isa/formats/util.isa: src/arch/mips/isa/includes.isa: src/arch/mips/isa/main.isa: src/arch/mips/isa/operands.isa: src/arch/mips/process.cc: src/arch/mips/regfile/misc_regfile.hh: src/arch/mips/stacktrace.hh: copyright 2006 src/arch/mips/isa_traits.cc: src/arch/mips/isa_traits.hh: copyright 2006 take out full system src/arch/mips/regfile/float_regfile.hh: src/arch/mips/regfile/regfile.hh: copyright 2006 use FloatRegVal src/arch/mips/regfile/int_regfile.hh: copyright 2006 move HI/LO to types.hh src/arch/mips/types.hh: copyright 2006 typedef FloatRegVal --HG-- extra : convert_revision : 1d0d72cd655a4e28622745a6c6b06349da533a1d --- src/arch/mips/isa/base.isa | 3 +- src/arch/mips/isa/bitfields.isa | 2 +- src/arch/mips/isa/decoder.isa | 2 +- src/arch/mips/isa/formats/basic.isa | 2 +- src/arch/mips/isa/formats/branch.isa | 2 +- src/arch/mips/isa/formats/control.isa | 2 +- src/arch/mips/isa/formats/fp.isa | 2 +- src/arch/mips/isa/formats/int.isa | 2 +- src/arch/mips/isa/formats/mem.isa | 2 +- src/arch/mips/isa/formats/noop.isa | 28 ++++++++++ src/arch/mips/isa/formats/tlbop.isa | 2 +- src/arch/mips/isa/formats/trap.isa | 2 +- src/arch/mips/isa/formats/unimp.isa | 2 +- src/arch/mips/isa/formats/unknown.isa | 2 +- src/arch/mips/isa/formats/util.isa | 2 +- src/arch/mips/isa/includes.isa | 2 +- src/arch/mips/isa/main.isa | 2 +- src/arch/mips/isa/operands.isa | 16 +++--- src/arch/mips/isa_traits.cc | 73 +------------------------- src/arch/mips/isa_traits.hh | 19 ------- src/arch/mips/process.cc | 1 + src/arch/mips/regfile/float_regfile.hh | 4 +- src/arch/mips/regfile/int_regfile.hh | 7 +-- src/arch/mips/regfile/misc_regfile.hh | 19 +------ src/arch/mips/regfile/regfile.hh | 23 ++------ src/arch/mips/stacktrace.hh | 2 +- src/arch/mips/types.hh | 7 +-- 27 files changed, 69 insertions(+), 163 deletions(-) diff --git a/src/arch/mips/isa/base.isa b/src/arch/mips/isa/base.isa index b733da7dab..f07b06e03b 100644 --- a/src/arch/mips/isa/base.isa +++ b/src/arch/mips/isa/base.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -38,7 +38,6 @@ output header {{ using namespace MipsISA; - /** * Base class for all MIPS static instructions. */ diff --git a/src/arch/mips/isa/bitfields.isa b/src/arch/mips/isa/bitfields.isa index e8d4578c73..35815bf1f8 100644 --- a/src/arch/mips/isa/bitfields.isa +++ b/src/arch/mips/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/decoder.isa b/src/arch/mips/isa/decoder.isa index 8b9d8eeb1b..9ac982e344 100644 --- a/src/arch/mips/isa/decoder.isa +++ b/src/arch/mips/isa/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/basic.isa b/src/arch/mips/isa/formats/basic.isa index 35ce092057..29dafd5419 100644 --- a/src/arch/mips/isa/formats/basic.isa +++ b/src/arch/mips/isa/formats/basic.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/branch.isa b/src/arch/mips/isa/formats/branch.isa index 827e3ccf06..5230ce9cc9 100644 --- a/src/arch/mips/isa/formats/branch.isa +++ b/src/arch/mips/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/control.isa b/src/arch/mips/isa/formats/control.isa index 509ee7e87d..6c7d396f3d 100644 --- a/src/arch/mips/isa/formats/control.isa +++ b/src/arch/mips/isa/formats/control.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa index d05b04d0ef..1e5d62626c 100644 --- a/src/arch/mips/isa/formats/fp.isa +++ b/src/arch/mips/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/int.isa b/src/arch/mips/isa/formats/int.isa index 8ac50b0097..56a4ec2041 100644 --- a/src/arch/mips/isa/formats/int.isa +++ b/src/arch/mips/isa/formats/int.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/mem.isa b/src/arch/mips/isa/formats/mem.isa index 5a8930a085..f03f7becd8 100644 --- a/src/arch/mips/isa/formats/mem.isa +++ b/src/arch/mips/isa/formats/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/noop.isa b/src/arch/mips/isa/formats/noop.isa index 430c32f82e..7f3d313ad3 100644 --- a/src/arch/mips/isa/formats/noop.isa +++ b/src/arch/mips/isa/formats/noop.isa @@ -1,5 +1,33 @@ // -*- mode:c++ -*- +// Copyright (c) 2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Korey Sewell + //////////////////////////////////////////////////////////////////// // // Nop diff --git a/src/arch/mips/isa/formats/tlbop.isa b/src/arch/mips/isa/formats/tlbop.isa index 75ab71c48b..b974ccbedb 100644 --- a/src/arch/mips/isa/formats/tlbop.isa +++ b/src/arch/mips/isa/formats/tlbop.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/trap.isa b/src/arch/mips/isa/formats/trap.isa index 8a9c5822a2..b9066f374d 100644 --- a/src/arch/mips/isa/formats/trap.isa +++ b/src/arch/mips/isa/formats/trap.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/unimp.isa b/src/arch/mips/isa/formats/unimp.isa index e17b5f832d..03068fa744 100644 --- a/src/arch/mips/isa/formats/unimp.isa +++ b/src/arch/mips/isa/formats/unimp.isa @@ -1,7 +1,7 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/unknown.isa b/src/arch/mips/isa/formats/unknown.isa index 41387adca5..70b3901e93 100644 --- a/src/arch/mips/isa/formats/unknown.isa +++ b/src/arch/mips/isa/formats/unknown.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/formats/util.isa b/src/arch/mips/isa/formats/util.isa index b67a02d073..0cc375af3b 100644 --- a/src/arch/mips/isa/formats/util.isa +++ b/src/arch/mips/isa/formats/util.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/includes.isa b/src/arch/mips/isa/includes.isa index 555cec2556..6b5f3c588f 100644 --- a/src/arch/mips/isa/includes.isa +++ b/src/arch/mips/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/main.isa b/src/arch/mips/isa/main.isa index 9da3fc0dbf..2d7c63cd5c 100644 --- a/src/arch/mips/isa/main.isa +++ b/src/arch/mips/isa/main.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2005 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/src/arch/mips/isa/operands.isa b/src/arch/mips/isa/operands.isa index 316552ef43..3843dc0532 100644 --- a/src/arch/mips/isa/operands.isa +++ b/src/arch/mips/isa/operands.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2003-2006 The Regents of The University of Michigan +// Copyright (c) 2006 The Regents of The University of Michigan // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -53,8 +53,8 @@ def operands {{ 'R2': ('IntReg', 'uw','2', 'IsInteger', 5), #Special Integer Reg operands - 'HI': ('IntReg', 'uw','32', 'IsInteger', 6), - 'LO': ('IntReg', 'uw','33', 'IsInteger', 7), + 'HI': ('IntReg', 'uw','MipsISA::HI', 'IsInteger', 6), + 'LO': ('IntReg', 'uw','MipsISA::LO', 'IsInteger', 7), #Immediate Value operand 'IntImm': ('IntReg', 'uw', 'INTIMM', 'IsInteger', 3), @@ -66,11 +66,11 @@ def operands {{ 'Fr': ('FloatReg', 'sf', 'FR', 'IsFloating', 3), #Special Floating Point Control Reg Operands - 'FIR': ('FloatReg', 'uw', '32', 'IsFloating', 1), - 'FCCR': ('FloatReg', 'uw', '33', 'IsFloating', 2), - 'FEXR': ('FloatReg', 'uw', '34', 'IsFloating', 3), - 'FENR': ('FloatReg', 'uw', '35', 'IsFloating', 3), - 'FCSR': ('FloatReg', 'uw', '36', 'IsFloating', 3), + 'FIR': ('FloatReg', 'uw', 'MipsISA::FIR', 'IsFloating', 1), + 'FCCR': ('FloatReg', 'uw', 'MipsISA::FCCR', 'IsFloating', 2), + 'FEXR': ('FloatReg', 'uw', 'MipsISA::FEXR', 'IsFloating', 3), + 'FENR': ('FloatReg', 'uw', 'MipsISA::FENR', 'IsFloating', 3), + 'FCSR': ('FloatReg', 'uw', 'MipsISA::FCSR', 'IsFloating', 3), #Operands For Paired Singles FP Operations 'Fd1': ('FloatReg', 'sf', 'FD', 'IsFloating', 4), diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index 9f3817a60a..85acc4e8cd 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -30,7 +30,7 @@ */ #include "arch/mips/isa_traits.hh" -#include "config/full_system.hh" +//#include "config/full_system.hh" #include "cpu/static_inst.hh" #include "sim/serialize.hh" #include "base/bitfield.hh" @@ -43,39 +43,14 @@ void MipsISA::copyRegs(ThreadContext *src, ThreadContext *dest) { panic("Copy Regs Not Implemented Yet\n"); - /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag); - uniq = xc->readMiscReg(MipsISA::Uniq_DepTag); - lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag); - lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag); - -#if FULL_SYSTEM - copyIprs(xc); - #endif*/ } void MipsISA::MiscRegFile::copyMiscRegs(ThreadContext *tc) { panic("Copy Misc. Regs Not Implemented Yet\n"); - /*fpcr = xc->readMiscReg(MipsISA::Fpcr_DepTag); - uniq = xc->readMiscReg(MipsISA::Uniq_DepTag); - lock_flag = xc->readMiscReg(MipsISA::Lock_Flag_DepTag); - lock_addr = xc->readMiscReg(MipsISA::Lock_Addr_DepTag); - - #endif*/ } -#if FULL_SYSTEM - -static inline Addr -TruncPage(Addr addr) -{ return addr & ~(MipsISA::PageBytes - 1); } - -static inline Addr -RoundPage(Addr addr) -{ return (addr + MipsISA::PageBytes - 1) & ~(MipsISA::PageBytes - 1); } -#endif - void IntRegFile::serialize(std::ostream &os) { @@ -100,12 +75,6 @@ RegFile::serialize(std::ostream &os) SERIALIZE_SCALAR(pc); SERIALIZE_SCALAR(npc); SERIALIZE_SCALAR(nnpc); -#if FULL_SYSTEM - SERIALIZE_ARRAY(palregs, NumIntRegs); - SERIALIZE_ARRAY(ipr, NumInternalProcRegs); - SERIALIZE_SCALAR(intrflag); - SERIALIZE_SCALAR(pal_shadow); -#endif } @@ -121,43 +90,5 @@ RegFile::unserialize(Checkpoint *cp, const std::string §ion) UNSERIALIZE_SCALAR(pc); UNSERIALIZE_SCALAR(npc); UNSERIALIZE_SCALAR(nnpc); -#if FULL_SYSTEM - UNSERIALIZE_ARRAY(palregs, NumIntRegs); - UNSERIALIZE_ARRAY(ipr, NumInternalProcRegs); - UNSERIALIZE_SCALAR(intrflag); - UNSERIALIZE_SCALAR(pal_shadow); -#endif + } - - -#if FULL_SYSTEM -void -PTE::serialize(std::ostream &os) -{ - SERIALIZE_SCALAR(tag); - SERIALIZE_SCALAR(ppn); - SERIALIZE_SCALAR(xre); - SERIALIZE_SCALAR(xwe); - SERIALIZE_SCALAR(asn); - SERIALIZE_SCALAR(asma); - SERIALIZE_SCALAR(fonr); - SERIALIZE_SCALAR(fonw); - SERIALIZE_SCALAR(valid); -} - - -void -PTE::unserialize(Checkpoint *cp, const std::string §ion) -{ - UNSERIALIZE_SCALAR(tag); - UNSERIALIZE_SCALAR(ppn); - UNSERIALIZE_SCALAR(xre); - UNSERIALIZE_SCALAR(xwe); - UNSERIALIZE_SCALAR(asn); - UNSERIALIZE_SCALAR(asma); - UNSERIALIZE_SCALAR(fonr); - UNSERIALIZE_SCALAR(fonw); - UNSERIALIZE_SCALAR(valid); -} - -#endif //FULL_SYSTEM diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh index dc8b6758aa..ff994bef99 100644 --- a/src/arch/mips/isa_traits.hh +++ b/src/arch/mips/isa_traits.hh @@ -57,12 +57,6 @@ namespace LittleEndianGuest {}; class StaticInst; class StaticInstPtr; -namespace MIPS34K { -int DTB_ASN_ASN(uint64_t reg); -int ITB_ASN_ASN(uint64_t reg); -}; - -#if !FULL_SYSTEM class SyscallReturn { public: template @@ -95,7 +89,6 @@ class SyscallReturn { uint64_t retval; bool success; }; -#endif namespace MipsISA { @@ -140,12 +133,6 @@ namespace MipsISA void copyRegs(ThreadContext *src, ThreadContext *dest); - uint64_t fpConvert(double fp_val, ConvertType cvt_type); - double roundFP(double val, int digits); - double truncFP(double val); - bool getFPConditionCode(uint32_t fcsr_reg, int cc); - uint32_t makeCCVector(uint32_t fcsr, int num, bool val); - // Machine operations void saveMachineReg(AnyReg &savereg, const RegFile ®_file, @@ -191,12 +178,6 @@ namespace MipsISA }; -#if FULL_SYSTEM - -#include "arch/mips/mips34k.hh" - -#endif - using namespace MipsISA; #endif // __ARCH_MIPS_ISA_TRAITS_HH__ diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index 7762c2fa0b..031c2030ec 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -1,3 +1,4 @@ + /* * Copyright (c) 2003-2004 The Regents of The University of Michigan * All rights reserved. diff --git a/src/arch/mips/regfile/float_regfile.hh b/src/arch/mips/regfile/float_regfile.hh index e260f681c4..61efbb416c 100644 --- a/src/arch/mips/regfile/float_regfile.hh +++ b/src/arch/mips/regfile/float_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -100,7 +100,7 @@ namespace MipsISA } } - Fault setReg(int floatReg, const FloatReg &val, int width) + Fault setReg(int floatReg, const FloatRegVal &val, int width) { using namespace std; switch(width) diff --git a/src/arch/mips/regfile/int_regfile.hh b/src/arch/mips/regfile/int_regfile.hh index dc82a3c26d..5add1b7be6 100644 --- a/src/arch/mips/regfile/int_regfile.hh +++ b/src/arch/mips/regfile/int_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -65,11 +65,6 @@ namespace MipsISA }; - enum MiscIntRegNums { - HI = NumIntArchRegs, - LO - }; - } // namespace MipsISA #endif diff --git a/src/arch/mips/regfile/misc_regfile.hh b/src/arch/mips/regfile/misc_regfile.hh index f8aeab8cbb..87961f97e5 100644 --- a/src/arch/mips/regfile/misc_regfile.hh +++ b/src/arch/mips/regfile/misc_regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,18 +45,12 @@ namespace MipsISA protected: uint64_t fpcr; // floating point condition codes - uint64_t uniq; // process-unique register bool lock_flag; // lock flag for LL/SC Addr lock_addr; // lock address for LL/SC MiscReg miscRegFile[NumMiscRegs]; public: - //These functions should be removed once the simplescalar cpu model - //has been replaced. - int getInstAsid(); - int getDataAsid(); - void copyMiscRegs(ThreadContext *tc); MiscReg readReg(int misc_reg) @@ -80,17 +74,6 @@ namespace MipsISA miscRegFile[misc_reg] = val; return NoFault; } -#if FULL_SYSTEM - void clearIprs() { } - - protected: - InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs - - private: - MiscReg readIpr(int idx, Fault &fault, ThreadContext *tc) { } - - Fault setIpr(int idx, uint64_t val, ThreadContext *tc) { } -#endif friend class RegFile; }; } // namespace MipsISA diff --git a/src/arch/mips/regfile/regfile.hh b/src/arch/mips/regfile/regfile.hh index af61e62cd7..a68120299d 100644 --- a/src/arch/mips/regfile/regfile.hh +++ b/src/arch/mips/regfile/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,12 +81,12 @@ namespace MipsISA return miscRegFile.setRegWithEffect(miscReg, val, tc); } - FloatReg readFloatReg(int floatReg) + FloatRegVal readFloatReg(int floatReg) { return floatRegFile.readReg(floatReg,SingleWidth); } - FloatReg readFloatReg(int floatReg, int width) + FloatRegVal readFloatReg(int floatReg, int width) { return floatRegFile.readReg(floatReg,width); } @@ -101,12 +101,12 @@ namespace MipsISA return floatRegFile.readRegBits(floatReg,width); } - Fault setFloatReg(int floatReg, const FloatReg &val) + Fault setFloatReg(int floatReg, const FloatRegVal &val) { return floatRegFile.setReg(floatReg, val, SingleWidth); } - Fault setFloatReg(int floatReg, const FloatReg &val, int width) + Fault setFloatReg(int floatReg, const FloatRegVal &val, int width) { return floatRegFile.setReg(floatReg, val, width); } @@ -168,16 +168,6 @@ namespace MipsISA nnpc = val; } - -#if FULL_SYSTEM - IntReg palregs[NumIntRegs]; // PAL shadow registers - InternalProcReg ipr[NumInternalProcRegs]; // internal processor regs - int intrflag; // interrupt flag - bool pal_shadow; // using pal_shadow registers - inline int instAsid() { return MIPS34K::ITB_ASN_ASN(ipr[IPR_ITB_ASN]); } - inline int dataAsid() { return MIPS34K::DTB_ASN_ASN(ipr[IPR_DTB_ASN]); } -#endif // FULL_SYSTEM - void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); @@ -193,9 +183,6 @@ namespace MipsISA void copyMiscRegs(ThreadContext *src, ThreadContext *dest); -#if FULL_SYSTEM - void copyIprs(ThreadContext *src, ThreadContext *dest); -#endif } // namespace MipsISA #endif diff --git a/src/arch/mips/stacktrace.hh b/src/arch/mips/stacktrace.hh index ff35aaeb06..f9e092dbd8 100644 --- a/src/arch/mips/stacktrace.hh +++ b/src/arch/mips/stacktrace.hh @@ -25,7 +25,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Korey Sewell + * Authors: Ali Saidi */ #ifndef __ARCH_MIPS_STACKTRACE_HH__ diff --git a/src/arch/mips/types.hh b/src/arch/mips/types.hh index 7cd2eed0c3..6330044d9f 100644 --- a/src/arch/mips/types.hh +++ b/src/arch/mips/types.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,14 +42,15 @@ namespace MipsISA typedef uint32_t IntReg; // floating point register file entry type - typedef double FloatReg; typedef uint32_t FloatReg32; typedef uint64_t FloatReg64; typedef uint64_t FloatRegBits; + typedef double FloatRegVal; + typedef double FloatReg; + // cop-0/cop-1 system control register typedef uint64_t MiscReg; - typedef uint64_t InternalProcReg; typedef union { IntReg intreg; From 88e22ee081f1b0259b624fe320af22a58f144251 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Thu, 15 Jun 2006 11:45:51 -0400 Subject: [PATCH 028/152] Get Port stuff working with full-system scripts. Key was adding support for cloning port references (trickier than it sounds). Got rid of class/instance thing and go back to instance cloning... still don't allow changing SimObject parameters/children after a class (instance) has been subclassed or instantiated (or cloned), which should avoid bizarre unintended behavior. configs/test/fs.py: Add ".port" to busses to get a port reference. Get rid of commented-out code. src/python/m5/__init__.py: resolveSimObject should call getCCObject() instead of createCCObject() to avoid cycles in recursively creating objects. src/python/m5/config.py: Get rid of class/instance thing and go back to instance cloning. Deep copy has to happen only on instance cloning then (and not on subclassing). Add getCCObject() method to force creation of C++ SimObject without recursively creating its children. Add support for cloning port references (trickier than it sounds). Also clean up some very obsolete comments. src/python/m5/objects/Bridge.py: src/python/m5/objects/Device.py: Add ports. --HG-- extra : convert_revision : 4816d05ead0de520748aace06dbd1911a33f0af8 --- configs/test/fs.py | 87 +++---- src/python/m5/__init__.py | 6 +- src/python/m5/config.py | 419 ++++++++++++++------------------ src/python/m5/objects/Bridge.py | 2 + src/python/m5/objects/Device.py | 2 + 5 files changed, 223 insertions(+), 293 deletions(-) diff --git a/configs/test/fs.py b/configs/test/fs.py index 333a4dfafe..c742e916ca 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -129,17 +129,7 @@ class BaseTsunami(Tsunami): ethernet = NSGigE(configdata=NSGigEPciData(), pci_bus=0, pci_dev=1, pci_func=0) etherint = NSGigEInt(device=Parent.ethernet) -# ethernet = Sinic(configdata=SinicPciData(), -# pci_bus=0, pci_dev=1, pci_func=0) -# etherint = SinicInt(device=Parent.ethernet) console = AlphaConsole(pio_addr=0x80200000000, disk=Parent.simple_disk) -# bridge = PciFake(configdata=BridgePciData(), pci_bus=0, pci_dev=2, pci_func=0) - -#class FreeBSDTsunami(BaseTsunami): -# disk0 = FreeBSDRootDisk(delay='0us', driveID='master') -# ide = IdeController(disks=[Parent.disk0], -# configdata=IdeControllerPciData(), -# pci_func=0, pci_dev=0, pci_bus=0) class LinuxTsunami(BaseTsunami): disk0 = LinuxRootDisk(driveID='master') @@ -149,45 +139,45 @@ class LinuxTsunami(BaseTsunami): configdata=IdeControllerPciData(), pci_func=0, pci_dev=0, pci_bus=0) -class LinuxAlphaSystem(LinuxAlphaSystem): +class MyLinuxAlphaSystem(LinuxAlphaSystem): magicbus = Bus(bus_id=0) magicbus2 = Bus(bus_id=1) bridge = Bridge() physmem = PhysicalMemory(range = AddrRange('128MB')) - bridge.side_a = magicbus - bridge.side_b = magicbus2 - c1 = Connector(side_a=Parent.physmem, side_b=Parent.magicbus2) + bridge.side_a = magicbus.port + bridge.side_b = magicbus2.port + physmem.port = magicbus2.port tsunami = LinuxTsunami() - tsunami.cchip.pio = magicbus - tsunami.pchip.pio = magicbus - tsunami.pciconfig.pio = magicbus - tsunami.fake_sm_chip.pio = magicbus - tsunami.ethernet.pio = magicbus - tsunami.ethernet.dma = magicbus - tsunami.fake_uart1.pio = magicbus - tsunami.fake_uart2.pio = magicbus - tsunami.fake_uart3.pio = magicbus - tsunami.fake_uart4.pio = magicbus - tsunami.ide.pio = magicbus - tsunami.ide.dma = magicbus - tsunami.fake_ppc.pio = magicbus - tsunami.fake_OROM.pio = magicbus - tsunami.fake_pnp_addr.pio = magicbus - tsunami.fake_pnp_write.pio = magicbus - tsunami.fake_pnp_read0.pio = magicbus - tsunami.fake_pnp_read1.pio = magicbus - tsunami.fake_pnp_read2.pio = magicbus - tsunami.fake_pnp_read3.pio = magicbus - tsunami.fake_pnp_read4.pio = magicbus - tsunami.fake_pnp_read5.pio = magicbus - tsunami.fake_pnp_read6.pio = magicbus - tsunami.fake_pnp_read7.pio = magicbus - tsunami.fake_ata0.pio = magicbus - tsunami.fake_ata1.pio = magicbus - tsunami.fb.pio = magicbus - tsunami.io.pio = magicbus - tsunami.uart.pio = magicbus - tsunami.console.pio = magicbus + tsunami.cchip.pio = magicbus.port + tsunami.pchip.pio = magicbus.port + tsunami.pciconfig.pio = magicbus.port + tsunami.fake_sm_chip.pio = magicbus.port + tsunami.ethernet.pio = magicbus.port + tsunami.ethernet.dma = magicbus.port + tsunami.fake_uart1.pio = magicbus.port + tsunami.fake_uart2.pio = magicbus.port + tsunami.fake_uart3.pio = magicbus.port + tsunami.fake_uart4.pio = magicbus.port + tsunami.ide.pio = magicbus.port + tsunami.ide.dma = magicbus.port + tsunami.fake_ppc.pio = magicbus.port + tsunami.fake_OROM.pio = magicbus.port + tsunami.fake_pnp_addr.pio = magicbus.port + tsunami.fake_pnp_write.pio = magicbus.port + tsunami.fake_pnp_read0.pio = magicbus.port + tsunami.fake_pnp_read1.pio = magicbus.port + tsunami.fake_pnp_read2.pio = magicbus.port + tsunami.fake_pnp_read3.pio = magicbus.port + tsunami.fake_pnp_read4.pio = magicbus.port + tsunami.fake_pnp_read5.pio = magicbus.port + tsunami.fake_pnp_read6.pio = magicbus.port + tsunami.fake_pnp_read7.pio = magicbus.port + tsunami.fake_ata0.pio = magicbus.port + tsunami.fake_ata1.pio = magicbus.port + tsunami.fb.pio = magicbus.port + tsunami.io.pio = magicbus.port + tsunami.uart.pio = magicbus.port + tsunami.console.pio = magicbus.port raw_image = RawDiskImage(image_file=disk('linux-latest.img'), read_only=True) simple_disk = SimpleDisk(disk=Parent.raw_image) @@ -196,7 +186,7 @@ class LinuxAlphaSystem(LinuxAlphaSystem): cpu = TimingSimpleCPU() else: cpu = AtomicSimpleCPU() - cpu.mem = Parent.magicbus2 + cpu.mem = magicbus2 cpu.itb = AlphaITB() cpu.dtb = AlphaDTB() sim_console = SimConsole(listener=ConsoleListener(port=3456)) @@ -224,11 +214,12 @@ def DualRoot(clientSystem, serverSystem): self.clock = '5GHz' return self -root = DualRoot(LinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), - LinuxAlphaSystem(readfile=script('netperf-server.rcS'))) +root = DualRoot( + MyLinuxAlphaSystem(readfile=script('netperf-stream-nt-client.rcS')), + MyLinuxAlphaSystem(readfile=script('netperf-server.rcS'))) m5.instantiate(root) exit_event = m5.simulate() -print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index a4fc9a5e39..f849a899be 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -107,11 +107,7 @@ env.update(os.environ) # Function to provide to C++ so it can look up instances based on paths def resolveSimObject(name): obj = config.instanceDict[name] - if not obj._ccObject: - obj.createCCObject() - if obj._ccObject == -1: - panic("resolveSimObject: recursive lookup error on %s" % name) - return obj._ccObject + return obj.getCCObject() # The final hook to generate .ini files. Called from the user script # once the config is built. diff --git a/src/python/m5/config.py b/src/python/m5/config.py index f23fd2c6fa..058e725782 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2004-2005 The Regents of The University of Michigan +# Copyright (c) 2004-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ # Authors: Steve Reinhardt # Nathan Binkert -import os, re, sys, types, inspect +import os, re, sys, types, inspect, copy import m5 from m5 import panic @@ -84,69 +84,22 @@ class Singleton(type): # # Once a set of Python objects have been instantiated in a hierarchy, # calling 'instantiate(obj)' (where obj is the root of the hierarchy) -# will generate a .ini file. See simple-4cpu.py for an example -# (corresponding to m5-test/simple-4cpu.ini). +# will generate a .ini file. # ##################################################################### -##################################################################### -# -# ConfigNode/SimObject classes -# -# The Python class hierarchy rooted by ConfigNode (which is the base -# class of SimObject, which in turn is the base class of all other M5 -# SimObject classes) has special attribute behavior. In general, an -# object in this hierarchy has three categories of attribute-like -# things: -# -# 1. Regular Python methods and variables. These must start with an -# underscore to be treated normally. -# -# 2. SimObject parameters. These values are stored as normal Python -# attributes, but all assignments to these attributes are checked -# against the pre-defined set of parameters stored in the class's -# _params dictionary. Assignments to attributes that do not -# correspond to predefined parameters, or that are not of the correct -# type, incur runtime errors. -# -# 3. Hierarchy children. The child nodes of a ConfigNode are stored -# in the node's _children dictionary, but can be accessed using the -# Python attribute dot-notation (just as they are printed out by the -# simulator). Children cannot be created using attribute assigment; -# they must be added by specifying the parent node in the child's -# constructor or using the '+=' operator. - -# The SimObject parameters are the most complex, for a few reasons. -# First, both parameter descriptions and parameter values are -# inherited. Thus parameter description lookup must go up the -# inheritance chain like normal attribute lookup, but this behavior -# must be explicitly coded since the lookup occurs in each class's -# _params attribute. Second, because parameter values can be set -# on SimObject classes (to implement default values), the parameter -# checking behavior must be enforced on class attribute assignments as -# well as instance attribute assignments. Finally, because we allow -# class specialization via inheritance (e.g., see the L1Cache class in -# the simple-4cpu.py example), we must do parameter checking even on -# class instantiation. To provide all these features, we use a -# metaclass to define most of the SimObject parameter behavior for -# this class hierarchy. -# -##################################################################### - - # dict to look up SimObjects based on path instanceDict = {} +############################# +# +# Utility methods +# +############################# + def isSimObject(value): return isinstance(value, SimObject) -def isSimObjectClass(value): - try: - return issubclass(value, SimObject) - except TypeError: - # happens if value is not a class at all - return False - def isSimObjectSequence(value): if not isinstance(value, (list, tuple)) or len(value) == 0: return False @@ -157,22 +110,9 @@ def isSimObjectSequence(value): return True -def isSimObjectClassSequence(value): - if not isinstance(value, (list, tuple)) or len(value) == 0: - return False - - for val in value: - if not isNullPointer(val) and not isSimObjectClass(val): - return False - - return True - def isSimObjectOrSequence(value): return isSimObject(value) or isSimObjectSequence(value) -def isSimObjectClassOrSequence(value): - return isSimObjectClass(value) or isSimObjectClassSequence(value) - def isNullPointer(value): return isinstance(value, NullSimObject) @@ -192,41 +132,36 @@ def applyOrMap(objOrSeq, meth, *args, **kwargs): return [applyMethod(o, meth, *args, **kwargs) for o in objOrSeq] -# The metaclass for ConfigNode (and thus for everything that derives -# from ConfigNode, including SimObject). This class controls how new -# classes that derive from ConfigNode are instantiated, and provides -# inherited class behavior (just like a class controls how instances -# of that class are instantiated, and provides inherited instance -# behavior). +# The metaclass for SimObject. This class controls how new classes +# that derive from SimObject are instantiated, and provides inherited +# class behavior (just like a class controls how instances of that +# class are instantiated, and provides inherited instance behavior). class MetaSimObject(type): # Attributes that can be set only at initialization time init_keywords = { 'abstract' : types.BooleanType, 'type' : types.StringType } # Attributes that can be set any time - keywords = { 'check' : types.FunctionType, - 'children' : types.ListType, - 'ccObject' : types.ObjectType } + keywords = { 'check' : types.FunctionType } # __new__ is called before __init__, and is where the statements # in the body of the class definition get loaded into the class's - # __dict__. We intercept this to filter out parameter assignments + # __dict__. We intercept this to filter out parameter & port assignments # and only allow "private" attributes to be passed to the base # __new__ (starting with underscore). def __new__(mcls, name, bases, dict): - if dict.has_key('_init_dict'): - # must have been called from makeSubclass() rather than - # via Python class declaration; bypass filtering process. - cls_dict = dict - else: - # Copy "private" attributes (including special methods - # such as __new__) to the official dict. Everything else - # goes in _init_dict to be filtered in __init__. - cls_dict = {} - for key,val in dict.items(): - if key.startswith('_'): - cls_dict[key] = val - del dict[key] - cls_dict['_init_dict'] = dict + # Copy "private" attributes, functions, and classes to the + # official dict. Everything else goes in _init_dict to be + # filtered in __init__. + cls_dict = {} + value_dict = {} + for key,val in dict.items(): + if key.startswith('_') or isinstance(val, (types.FunctionType, + types.TypeType)): + cls_dict[key] = val + else: + # must be a param/port setting + value_dict[key] = val + cls_dict['_value_dict'] = value_dict return super(MetaSimObject, mcls).__new__(mcls, name, bases, cls_dict) # subclass initialization @@ -236,11 +171,15 @@ class MetaSimObject(type): super(MetaSimObject, cls).__init__(name, bases, dict) # initialize required attributes - cls._params = multidict() - cls._values = multidict() - cls._ports = multidict() - cls._instantiated = False # really instantiated or subclassed - cls._anon_subclass_counter = 0 + + # class-only attributes + cls._params = multidict() # param descriptions + cls._ports = multidict() # port descriptions + + # class or instance attributes + cls._values = multidict() # param values + cls._port_map = multidict() # port bindings + cls._instantiated = False # really instantiated, cloned, or subclassed # We don't support multiple inheritance. If you want to, you # must fix multidict to deal with it properly. @@ -249,21 +188,28 @@ class MetaSimObject(type): base = bases[0] - # the only time the following is not true is when we define - # the SimObject class itself + # Set up general inheritance via multidicts. A subclass will + # inherit all its settings from the base class. The only time + # the following is not true is when we define the SimObject + # class itself (in which case the multidicts have no parent). if isinstance(base, MetaSimObject): cls._params.parent = base._params - cls._values.parent = base._values cls._ports.parent = base._ports + cls._values.parent = base._values + cls._port_map.parent = base._port_map + # mark base as having been subclassed base._instantiated = True - # now process the _init_dict items - for key,val in cls._init_dict.items(): - if isinstance(val, (types.FunctionType, types.TypeType)): - type.__setattr__(cls, key, val) - + # Now process the _value_dict items. They could be defining + # new (or overriding existing) parameters or ports, setting + # class keywords (e.g., 'abstract'), or setting parameter + # values or port bindings. The first 3 can only be set when + # the class is defined, so we handle them here. The others + # can be set later too, so just emulate that by calling + # setattr(). + for key,val in cls._value_dict.items(): # param descriptions - elif isinstance(val, ParamDesc): + if isinstance(val, ParamDesc): cls._new_param(key, val) # port objects @@ -278,27 +224,6 @@ class MetaSimObject(type): else: setattr(cls, key, val) - # Pull the deep-copy memoization dict out of the class dict if - # it's there... - memo = cls.__dict__.get('_memo', {}) - - # Handle SimObject values - for key,val in cls._values.iteritems(): - # SimObject instances need to be promoted to classes. - # Existing classes should not have any instance values, so - # these can only occur at the lowest level dict (the - # parameters just being set in this class definition). - if isSimObjectOrSequence(val): - assert(val == cls._values.local[key]) - cls._values[key] = applyOrMap(val, 'makeClass', memo) - # SimObject classes need to be subclassed so that - # parameters that get set at this level only affect this - # level and derivatives. - elif isSimObjectClassOrSequence(val): - assert(not cls._values.local.has_key(key)) - cls._values[key] = applyOrMap(val, 'makeSubclass', {}, memo) - - def _set_keyword(cls, keyword, val, kwtype): if not isinstance(val, kwtype): raise TypeError, 'keyword %s has bad type %s (expecting %s)' % \ @@ -328,15 +253,15 @@ class MetaSimObject(type): self._ports[attr].connect(self, attr, value) return - # must be SimObject param - param = cls._params.get(attr, None) - if param: - # It's ok: set attribute by delegating to 'object' class. - if isSimObjectOrSequence(value) and cls._instantiated: - raise AttributeError, \ - "Cannot set SimObject parameter '%s' after\n" \ + if isSimObjectOrSequence(value) and cls._instantiated: + raise RuntimeError, \ + "cannot set SimObject parameter '%s' after\n" \ " class %s has been instantiated or subclassed" \ % (attr, cls.__name__) + + # check for param + param = cls._params.get(attr, None) + if param: try: cls._values[attr] = param.convert(value) except Exception, e: @@ -344,9 +269,9 @@ class MetaSimObject(type): (e, cls.__name__, attr, value) e.args = (msg, ) raise - # I would love to get rid of this elif isSimObjectOrSequence(value): - cls._values[attr] = value + # if RHS is a SimObject, it's an implicit child assignment + cls._values[attr] = value else: raise AttributeError, \ "Class %s has no parameter %s" % (cls.__name__, attr) @@ -358,23 +283,7 @@ class MetaSimObject(type): raise AttributeError, \ "object '%s' has no attribute '%s'" % (cls.__name__, attr) - # Create a subclass of this class. Basically a function interface - # to the standard Python class definition mechanism, primarily for - # internal use. 'memo' dict param supports "deep copy" (really - # "deep subclass") operations... within a given operation, - # multiple references to a class should result in a single - # subclass object with multiple references to it (as opposed to - # mutiple unique subclasses). - def makeSubclass(cls, init_dict, memo = {}): - subcls = memo.get(cls) - if not subcls: - name = cls.__name__ + '_' + str(cls._anon_subclass_counter) - cls._anon_subclass_counter += 1 - subcls = MetaSimObject(name, (cls,), - { '_init_dict': init_dict, '_memo': memo }) - return subcls - -# The ConfigNode class is the root of the special hierarchy. Most of +# The SimObject class is the root of the special hierarchy. Most of # the code in this class deals with the configuration hierarchy itself # (parent/child node relationships). class SimObject(object): @@ -382,83 +291,72 @@ class SimObject(object): # get this metaclass. __metaclass__ = MetaSimObject - # __new__ operator allocates new instances of the class. We - # override it here just to support "deep instantiation" operation - # via the _memo dict. When recursively instantiating an object - # hierarchy we want to make sure that each class is instantiated - # only once, and that if there are multiple references to the same - # original class, we end up with the corresponding instantiated - # references all pointing to the same instance. - def __new__(cls, _memo = None, **kwargs): - if _memo is not None and _memo.has_key(cls): - # return previously instantiated object - assert(len(kwargs) == 0) - return _memo[cls] - else: - # Need a new one... if it needs to be memoized, this will - # happen in __init__. We defer the insertion until then - # so __init__ can use the memo dict to tell whether or not - # to perform the initialization. - return super(SimObject, cls).__new__(cls, **kwargs) + # Initialize new instance. For objects with SimObject-valued + # children, we need to recursively clone the classes represented + # by those param values as well in a consistent "deep copy"-style + # fashion. That is, we want to make sure that each instance is + # cloned only once, and that if there are multiple references to + # the same original object, we end up with the corresponding + # cloned references all pointing to the same cloned instance. + def __init__(self, **kwargs): + ancestor = kwargs.get('_ancestor') + memo_dict = kwargs.get('_memo') + if memo_dict is None: + # prepare to memoize any recursively instantiated objects + memo_dict = {} + elif ancestor: + # memoize me now to avoid problems with recursive calls + memo_dict[ancestor] = self - # Initialize new instance previously allocated by __new__. For - # objects with SimObject-valued params, we need to recursively - # instantiate the classes represented by those param values as - # well (in a consistent "deep copy"-style fashion; see comment - # above). - def __init__(self, _memo = None, **kwargs): - if _memo is not None: - # We're inside a "deep instantiation" - assert(isinstance(_memo, dict)) - assert(len(kwargs) == 0) - if _memo.has_key(self.__class__): - # __new__ returned an existing, already initialized - # instance, so there's nothing to do here - assert(_memo[self.__class__] == self) - return - # no pre-existing object, so remember this one here - _memo[self.__class__] = self - else: - # This is a new top-level instantiation... don't memoize - # this objcet, but prepare to memoize any recursively - # instantiated objects. - _memo = {} - - self.__class__._instantiated = True + if not ancestor: + ancestor = self.__class__ + ancestor._instantiated = True + # initialize required attributes + self._parent = None self._children = {} + self._ccObject = None # pointer to C++ object + self._instantiated = False # really "cloned" + # Inherit parameter values from class using multidict so # individual value settings can be overridden. - self._values = multidict(self.__class__._values) - # For SimObject-valued parameters, the class should have - # classes (not instances) for the values. We need to - # instantiate these classes rather than just inheriting the - # class object. - for key,val in self.__class__._values.iteritems(): - if isSimObjectClass(val): - setattr(self, key, val(_memo)) - elif isSimObjectClassSequence(val) and len(val): - setattr(self, key, [ v(_memo) for v in val ]) + self._values = multidict(ancestor._values) + # clone SimObject-valued parameters + for key,val in ancestor._values.iteritems(): + if isSimObject(val): + setattr(self, key, val(_memo=memo_dict)) + elif isSimObjectSequence(val) and len(val): + setattr(self, key, [ v(_memo=memo_dict) for v in val ]) + # clone port references. no need to use a multidict here + # since we will be creating new references for all ports. + self._port_map = {} + for key,val in ancestor._port_map.iteritems(): + self._port_map[key] = applyOrMap(val, 'clone', memo_dict) # apply attribute assignments from keyword args, if any for key,val in kwargs.iteritems(): setattr(self, key, val) - self._ccObject = None # pointer to C++ object - self._port_map = {} # map of port connections - - # Use this instance as a template to create a new class. - def makeClass(self, memo = {}): - cls = memo.get(self) - if not cls: - cls = self.__class__.makeSubclass(self._values.local) - memo[self] = cls - return cls - - # Direct instantiation of instances (cloning) is no longer - # allowed; must generate class from instance first. + # "Clone" the current instance by creating another instance of + # this instance's class, but that inherits its parameter values + # and port mappings from the current instance. If we're in a + # "deep copy" recursive clone, check the _memo dict to see if + # we've already cloned this instance. def __call__(self, **kwargs): - raise TypeError, "cannot instantiate SimObject; "\ - "use makeClass() to make class first" + memo_dict = kwargs.get('_memo') + if memo_dict is None: + # no memo_dict: must be top-level clone operation. + # this is only allowed at the root of a hierarchy + if self._parent: + raise RuntimeError, "attempt to clone object %s " \ + "not at the root of a tree (parent = %s)" \ + % (self, self._parent) + # create a new dict and use that. + memo_dict = {} + kwargs['_memo'] = memo_dict + elif memo_dict.has_key(self): + # clone already done & memoized + return memo_dict[self] + return self.__class__(_ancestor = self, **kwargs) def __getattr__(self, attr): if self._ports.has_key(attr): @@ -485,10 +383,14 @@ class SimObject(object): self._ports[attr].connect(self, attr, value) return + if isSimObjectOrSequence(value) and self._instantiated: + raise RuntimeError, \ + "cannot set SimObject parameter '%s' after\n" \ + " instance been cloned %s" % (attr, `self`) + # must be SimObject param param = self._params.get(attr, None) if param: - # It's ok: set attribute by delegating to 'object' class. try: value = param.convert(value) except Exception, e: @@ -496,7 +398,6 @@ class SimObject(object): (e, self.__class__.__name__, attr, value) e.args = (msg, ) raise - # I would love to get rid of this elif isSimObjectOrSequence(value): pass else: @@ -535,13 +436,13 @@ class SimObject(object): self._children[name] = value def set_path(self, parent, name): - if not hasattr(self, '_parent'): + if not self._parent: self._parent = parent self._name = name parent.add_child(name, self) def path(self): - if not hasattr(self, '_parent'): + if not self._parent: return 'root' ppath = self._parent.path() if ppath == 'root': @@ -618,13 +519,22 @@ class SimObject(object): # Call C++ to create C++ object corresponding to this object and # (recursively) all its children def createCCObject(self): - if self._ccObject: - return - self._ccObject = -1 - self._ccObject = m5.main.createSimObject(self.path()) + self.getCCObject() # force creation for child in self._children.itervalues(): child.createCCObject() + # Get C++ object corresponding to this object, calling C++ if + # necessary to construct it. Does *not* recursively create + # children. + def getCCObject(self): + if not self._ccObject: + self._ccObject = -1 # flag to catch cycles in recursion + self._ccObject = m5.main.createSimObject(self.path()) + elif self._ccObject == -1: + raise RuntimeError, "%s: recursive call to getCCObject()" \ + % self.path() + return self._ccObject + # Create C++ port connections corresponding to the connections in # _port_map (& recursively for all children) def connectPorts(self): @@ -723,9 +633,9 @@ class BaseProxy(object): if self._search_up: while not done: - try: obj = obj._parent - except: break - + obj = obj._parent + if not obj: + break result, done = self.find(obj) if not done: @@ -841,16 +751,16 @@ Self = ProxyFactory(search_self = True, search_up = False) # # Parameter description classes # -# The _params dictionary in each class maps parameter names to -# either a Param or a VectorParam object. These objects contain the +# The _params dictionary in each class maps parameter names to either +# a Param or a VectorParam object. These objects contain the # parameter description string, the parameter type, and the default -# value (loaded from the PARAM section of the .odesc files). The -# _convert() method on these objects is used to force whatever value -# is assigned to the parameter to the appropriate type. +# value (if any). The convert() method on these objects is used to +# force whatever value is assigned to the parameter to the appropriate +# type. # # Note that the default values are loaded into the class's attribute # space when the parameter dictionary is initialized (in -# MetaConfigNode._setparams()); after that point they aren't used. +# MetaSimObject._new_param()); after that point they aren't used. # ##################################################################### @@ -1480,6 +1390,7 @@ AllMemory = AddrRange(0, MaxAddr) # particular SimObject. class PortRef(object): def __init__(self, simobj, name, isVec): + assert(isSimObject(simobj)) self.simobj = simobj self.name = name self.index = -1 @@ -1502,13 +1413,38 @@ class PortRef(object): self.simobj._port_map[self.name] = curMap self.peer = other + def clone(self, memo): + newRef = copy.copy(self) + assert(isSimObject(newRef.simobj)) + newRef.simobj = newRef.simobj(_memo=memo) + # Tricky: if I'm the *second* PortRef in the pair to be + # cloned, then my peer is still in the middle of its clone + # method, and thus hasn't returned to its owner's + # SimObject.__init__ to get installed in _port_map. As a + # result I have no way of finding the *new* peer object. So I + # mark myself as "waiting" for my peer, and I let the *first* + # PortRef clone call set up both peer pointers after I return. + newPeer = newRef.simobj._port_map.get(self.name) + if newPeer: + if self.isVec: + assert(self.index != -1) + newPeer = newPeer[self.index] + # other guy is all set up except for his peer pointer + assert(newPeer.peer == -1) # peer must be waiting for handshake + newPeer.peer = newRef + newRef.peer = newPeer + else: + # other guy is in clone; just wait for him to do the work + newRef.peer = -1 # mark as waiting for handshake + return newRef + # Call C++ to create corresponding port connection between C++ objects def ccConnect(self): if self.ccConnected: # already done this return peer = self.peer - m5.main.connectPorts(self.simobj._ccObject, self.name, self.index, - peer.simobj._ccObject, peer.name, peer.index) + m5.main.connectPorts(self.simobj.getCCObject(), self.name, self.index, + peer.simobj.getCCObject(), peer.name, peer.index) self.ccConnected = True peer.ccConnected = True @@ -1528,6 +1464,9 @@ class Port(object): # Connect an instance of this port (on the given SimObject with # the given name) with the port described by the supplied PortRef def connect(self, simobj, name, ref): + if not isinstance(ref, PortRef): + raise TypeError, \ + "assigning non-port reference port '%s'" % name myRef = self.makeRef(simobj, name) myRef.setPeer(ref) ref.setPeer(myRef) diff --git a/src/python/m5/objects/Bridge.py b/src/python/m5/objects/Bridge.py index 8805357552..c9e673afb6 100644 --- a/src/python/m5/objects/Bridge.py +++ b/src/python/m5/objects/Bridge.py @@ -3,6 +3,8 @@ from MemObject import MemObject class Bridge(MemObject): type = 'Bridge' + side_a = Port('Side A port') + side_b = Port('Side B port') queue_size_a = Param.Int(16, "The number of requests to buffer") queue_size_b = Param.Int(16, "The number of requests to buffer") delay = Param.Latency('0ns', "The latency of this bridge") diff --git a/src/python/m5/objects/Device.py b/src/python/m5/objects/Device.py index 7798f5f048..222f750da6 100644 --- a/src/python/m5/objects/Device.py +++ b/src/python/m5/objects/Device.py @@ -4,6 +4,7 @@ from MemObject import MemObject class PioDevice(MemObject): type = 'PioDevice' abstract = True + pio = Port("Programmed I/O port") platform = Param.Platform(Parent.any, "Platform this device is part of") system = Param.System(Parent.any, "System this device is part of") @@ -16,3 +17,4 @@ class BasicPioDevice(PioDevice): class DmaDevice(PioDevice): type = 'DmaDevice' abstract = True + dma = Port("DMA port") From 51a5b826373e2c08ba173854a19597d59e0e3c90 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Thu, 15 Jun 2006 22:01:28 -0400 Subject: [PATCH 029/152] Initial changes to allowed DetailedCPU to work with other architectures (i.e. Sparc & MIPS) Still need to add some code to fetch & commit stages src/cpu/o3/commit.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: Add nextNPC read & set functions src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: Add nextNPC --HG-- extra : convert_revision : 120677547d54091411399156bd066ce5baf785f7 --- src/cpu/o3/commit.hh | 14 +++++++++++++- src/cpu/o3/cpu.cc | 17 +++++++++++++++++ src/cpu/o3/cpu.hh | 7 +++++++ src/cpu/o3/fetch.hh | 10 ++++++++++ src/cpu/o3/fetch_impl.hh | 24 ++++++++++++++++++++---- 5 files changed, 67 insertions(+), 5 deletions(-) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 0b31cb9c85..c73b39ec6b 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_COMMIT_HH__ @@ -280,12 +281,20 @@ class DefaultCommit /** Sets the PC of a specific thread. */ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } - /** Reads the PC of a specific thread. */ + /** Reads the next PC of a specific thread. */ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } +#if THE_ISA != ALPHA_ISA + /** Reads the next NPC of a specific thread. */ + uint64_t readNextPC(unsigned tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextPC(uint64_t val, unsigned tid) { nextNPC[tid] = val; } +#endif + private: /** Time buffer interface. */ TimeBuffer *timeBuffer; @@ -397,6 +406,9 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; + /** The next NPC of each thread. */ + Addr nextNPC[Impl::MaxThreads]; + /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 788c6b1647..d5538cdf07 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" @@ -922,6 +923,22 @@ FullO3CPU::setNextPC(uint64_t val,unsigned tid) commit.setNextPC(val, tid); } +#if THE_ISA != ALPHA_ISA +template +uint64_t +FullO3CPU::readNextNPC(unsigned tid) +{ + return commit.readNextNPC(tid); +} + +template +void +FullO3CPU::setNextNNPC(uint64_t val,unsigned tid) +{ + commit.setNextNPC(val, tid); +} +#endif + template typename FullO3CPU::ListIt FullO3CPU::addInst(DynInstPtr &inst) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ff41a33061..8f4175c70e 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_CPU_HH__ @@ -299,6 +300,12 @@ class FullO3CPU : public BaseFullCPU /** Sets the next PC of a specific thread. */ void setNextPC(uint64_t val, unsigned tid); + /** Reads the next NPC of a specific thread. */ + uint64_t readNextNPC(unsigned tid); + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(uint64_t val, unsigned tid); + /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 962d464372..c2d91a3797 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_FETCH_HH__ @@ -335,6 +336,15 @@ class DefaultFetch /** Per-thread next PC. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA + /** Per-thread next Next PC. + * This is not a real register but is used for + * architectures that use a branch-delay slot. + * (such as MIPS or Sparc) + */ + Addr nextNPC[Impl::MaxThreads]; +#endif + /** Memory request used to access cache. */ RequestPtr memReq[Impl::MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 477a1469cc..4993819be8 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "arch/isa_traits.hh" @@ -330,6 +331,9 @@ DefaultFetch::initStage() for (int tid = 0; tid < numThreads; tid++) { PC[tid] = cpu->readPC(tid); nextPC[tid] = cpu->readNextPC(tid); +#if THE_ISA != ALPHA_ISA + nextNPC[tid] = cpu->readNextNPC(tid); +#endif } } @@ -404,6 +408,9 @@ DefaultFetch::takeOverFrom() stalls[i].commit = 0; PC[i] = cpu->readPC(i); nextPC[i] = cpu->readNextPC(i); +#if THE_ISA != ALPHA_ISA + nextNPC[i] = cpu->readNextNPC(i); +#endif fetchStatus[i] = Running; } numInst = 0; @@ -1024,7 +1031,7 @@ DefaultFetch::fetch(bool &status_change) fetch_PC = next_PC; if (instruction->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", + warn("cycle %lli: Quiesce instruction encountered, halting fetch!", curTick); fetchStatus[tid] = QuiescePending; ++numInst; @@ -1045,8 +1052,17 @@ DefaultFetch::fetch(bool &status_change) if (fault == NoFault) { DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); +#if THE_ISA == ALPHA_ISA PC[tid] = next_PC; nextPC[tid] = next_PC + instSize; +#else + PC[tid] = next_PC; + nextPC[tid] = next_PC + instSize; + nextPC[tid] = next_PC + instSize; + + thread->setNextPC(thread->readNextNPC()); + thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst)); +#endif } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) @@ -1089,9 +1105,9 @@ DefaultFetch::fetch(bool &status_change) fetchStatus[tid] = TrapPending; status_change = true; - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #else // !FULL_SYSTEM - warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); + warn("cycle %lli: fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); #endif // FULL_SYSTEM } } @@ -1260,6 +1276,6 @@ int DefaultFetch::branchCount() { list::iterator threads = (*activeThreads).begin(); - + warn("Branch Count Fetch policy unimplemented\n"); return *threads; } From 720e6c4145726d310aa19fed4f48bf6a8e32912e Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 13:10:47 -0400 Subject: [PATCH 030/152] Checker updates. src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: Updates for checker. Output more informative messages on error. Rename some functions. Add in option to warn (and not exit) on load results being incorrect. src/cpu/checker/cpu_builder.cc: src/cpu/checker/o3_cpu_builder.cc: Add in parameter to warn (and not exit) on load result errors. src/cpu/o3/commit_impl.hh: src/cpu/o3/lsq_unit_impl.hh: Renamed checker functin. --HG-- extra : convert_revision : d7aa28b8462691d20600f97a7213e2acd91c5665 --- src/cpu/checker/cpu.cc | 85 ++++++++++++++++++++++++------- src/cpu/checker/cpu.hh | 20 +++++++- src/cpu/checker/cpu_builder.cc | 4 ++ src/cpu/checker/o3_cpu_builder.cc | 4 ++ src/cpu/o3/commit_impl.hh | 4 +- src/cpu/o3/lsq_unit_impl.hh | 4 +- 6 files changed, 96 insertions(+), 25 deletions(-) diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 6971ab37fd..99189a8966 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -78,6 +78,7 @@ CheckerCPU::CheckerCPU(Params *p) changedPC = willChangePC = changedNextPC = false; exitOnError = p->exitOnError; + warnOnlyOnLoadError = p->warnOnlyOnLoadError; #if FULL_SYSTEM itb = p->itb; dtb = p->dtb; @@ -409,9 +410,17 @@ CheckerCPU::checkFlags(Request *req) } } +void +CheckerCPU::dumpAndExit() +{ + warn("%lli: Checker PC:%#x, next PC:%#x", + curTick, thread->readPC(), thread->readNextPC()); + panic("Checker found an error!"); +} + template void -Checker::tick(DynInstPtr &completed_inst) +Checker::verify(DynInstPtr &completed_inst) { DynInstPtr inst; @@ -485,7 +494,7 @@ Checker::tick(DynInstPtr &completed_inst) warn("%lli: Changed PC does not match expected PC, " "changed: %#x, expected: %#x", curTick, thread->readPC(), newPC); - handleError(); + CheckerCPU::handleError(); } willChangePC = false; } @@ -524,7 +533,7 @@ Checker::tick(DynInstPtr &completed_inst) // possible that its ITB entry was kicked out. warn("%lli: Instruction PC %#x was not found in the ITB!", curTick, thread->readPC()); - handleError(); + handleError(inst); // go to the next instruction thread->setPC(thread->readNextPC()); @@ -676,7 +685,7 @@ Checker::validateInst(DynInstPtr &inst) warn("%lli: Changed PCs recently, may not be an error", curTick); } else { - handleError(); + handleError(inst); } } @@ -686,7 +695,7 @@ Checker::validateInst(DynInstPtr &inst) warn("%lli: Binary instructions do not match! Inst: %#x, " "checker: %#x", curTick, mi, machInst); - handleError(); + handleError(inst); } } @@ -694,25 +703,33 @@ template void Checker::validateExecution(DynInstPtr &inst) { + bool result_mismatch = false; if (inst->numDestRegs()) { // @todo: Support more destination registers. if (inst->isUnverifiable()) { // Unverifiable instructions assume they were executed // properly by the CPU. Grab the result from the // instruction and write it to the register. - RegIndex idx = inst->destRegIdx(0); - if (idx < TheISA::FP_Base_DepTag) { - thread->setIntReg(idx, inst->readIntResult()); - } else if (idx < TheISA::Fpcr_DepTag) { - thread->setFloatRegBits(idx, inst->readIntResult()); - } else { - thread->setMiscReg(idx, inst->readIntResult()); - } + copyResult(inst); } else if (result.integer != inst->readIntResult()) { - warn("%lli: Instruction results do not match! (Values may not " - "actually be integers) Inst: %#x, checker: %#x", - curTick, inst->readIntResult(), result.integer); - handleError(); + result_mismatch = true; + } + } + + if (result_mismatch) { + warn("%lli: Instruction results do not match! (Values may not " + "actually be integers) Inst: %#x, checker: %#x", + curTick, inst->readIntResult(), result.integer); + + // It's useful to verify load values from memory, but in MP + // systems the value obtained at execute may be different than + // the value obtained at completion. Similarly DMA can + // present the same problem on even UP systems. Thus there is + // the option to only warn on loads having a result error. + if (inst->isLoad() && warnOnlyOnLoadError) { + copyResult(inst); + } else { + handleError(inst); } } @@ -720,7 +737,7 @@ Checker::validateExecution(DynInstPtr &inst) warn("%lli: Instruction next PCs do not match! Inst: %#x, " "checker: %#x", curTick, inst->readNextPC(), thread->readNextPC()); - handleError(); + handleError(inst); } // Checking side effect registers can be difficult if they are not @@ -739,7 +756,7 @@ Checker::validateExecution(DynInstPtr &inst) curTick, misc_reg_idx, inst->tcBase()->readMiscReg(misc_reg_idx), thread->readMiscReg(misc_reg_idx)); - handleError(); + handleError(inst); } } } @@ -750,6 +767,36 @@ Checker::validateState() { } +template +void +Checker::copyResult(DynInstPtr &inst) +{ + RegIndex idx = inst->destRegIdx(0); + if (idx < TheISA::FP_Base_DepTag) { + thread->setIntReg(idx, inst->readIntResult()); + } else if (idx < TheISA::Fpcr_DepTag) { + thread->setFloatRegBits(idx, inst->readIntResult()); + } else { + thread->setMiscReg(idx, inst->readIntResult()); + } +} + +template +void +Checker::dumpAndExit(DynInstPtr &inst) +{ + cprintf("Error detected, instruction information:\n"); + cprintf("PC:%#x, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + inst->readPC(), + inst->readNextPC(), + inst->seqNum, + inst->threadNumber, + inst->isCompleted()); + inst->dump(); + CheckerCPU::dumpAndExit(); +} + template void Checker::dumpInsts() diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index c9986d228b..785387e600 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -103,6 +103,7 @@ class CheckerCPU : public BaseCPU Process *process; #endif bool exitOnError; + bool warnOnlyOnLoadError; }; public: @@ -335,10 +336,13 @@ class CheckerCPU : public BaseCPU void handleError() { if (exitOnError) - panic("Checker found error!"); + dumpAndExit(); } + bool checkFlags(Request *req); + void dumpAndExit(); + ThreadContext *tcBase() { return tc; } SimpleThread *threadBase() { return thread; } @@ -351,6 +355,7 @@ class CheckerCPU : public BaseCPU uint64_t newPC; bool changedNextPC; bool exitOnError; + bool warnOnlyOnLoadError; InstSeqNum youngestSN; }; @@ -372,12 +377,23 @@ class Checker : public CheckerCPU void switchOut(Sampler *s); void takeOverFrom(BaseCPU *oldCPU); - void tick(DynInstPtr &inst); + void verify(DynInstPtr &inst); void validateInst(DynInstPtr &inst); void validateExecution(DynInstPtr &inst); void validateState(); + void copyResult(DynInstPtr &inst); + + private: + void handleError(DynInstPtr &inst) + { + if (exitOnError) + dumpAndExit(inst); + } + + void dumpAndExit(DynInstPtr &inst); + std::list instList; typedef typename std::list::iterator InstListIt; void dumpInsts(); diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/checker/cpu_builder.cc index 3b75832948..3c43ab503f 100644 --- a/src/cpu/checker/cpu_builder.cc +++ b/src/cpu/checker/cpu_builder.cc @@ -77,6 +77,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) Param defer_registration; Param exitOnError; + Param warnOnlyOnLoadError; Param function_trace; Param function_trace_start; @@ -110,6 +111,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -126,6 +129,7 @@ CREATE_SIM_OBJECT(OzoneChecker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/checker/o3_cpu_builder.cc index 59a6c71585..534a5e28c3 100644 --- a/src/cpu/checker/o3_cpu_builder.cc +++ b/src/cpu/checker/o3_cpu_builder.cc @@ -75,6 +75,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) Param defer_registration; Param exitOnError; + Param warnOnlyOnLoadError; Param function_trace; Param function_trace_start; @@ -105,6 +106,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load " + "result errors", false), INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") @@ -121,6 +124,7 @@ CREATE_SIM_OBJECT(O3Checker) params->max_loads_any_thread = 0; params->max_loads_all_threads = 0; params->exitOnError = exitOnError; + params->warnOnlyOnLoadError = warnOnlyOnLoadError; params->deferRegistration = defer_registration; params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index b0c8bee773..a182719184 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -975,7 +975,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) // Use checker prior to updating anything due to traps or PC // based events. if (cpu->checker) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } // Check if the instruction caused a fault. If so, trap. @@ -993,7 +993,7 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) } if (cpu->checker && head_inst->isStore()) { - cpu->checker->tick(head_inst); + cpu->checker->verify(head_inst); } assert(!thread[tid]->inSyscall); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index b48d7fb741..6e201ea5f2 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -789,7 +789,7 @@ LSQUnit::storePostSend(Packet *pkt) // verify the value in memory for stores. storeQueue[storeWBIdx].inst->setCompleted(); if (cpu->checker) { - cpu->checker->tick(storeQueue[storeWBIdx].inst); + cpu->checker->verify(storeQueue[storeWBIdx].inst); } } @@ -885,7 +885,7 @@ LSQUnit::completeStore(int store_idx) // may get reported twice to the checker, but the checker can // handle that case. if (cpu->checker) { - cpu->checker->tick(storeQueue[store_idx].inst); + cpu->checker->verify(storeQueue[store_idx].inst); } } From baba18ab9214d1fe2236cd932c3bfca5ddfb06d6 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 17:08:47 -0400 Subject: [PATCH 031/152] Two updates that got combined into one ChangeSet accidentally. They're both pretty simple so they shouldn't cause any trouble. First: Rename FullCPU and its variants in the o3 directory to O3CPU to differentiate from the old model, and also to specify it's an out of order model. Second: Include build options for selecting the Checker to be used. These options make sure if the Checker is being used there is a CPU that supports it also being compiled. SConstruct: Add in option USE_CHECKER to allow for not compiling in checker code. The checker is enabled through this option instead of through the CPU_MODELS list. However it's still necessary to treat the Checker like a CPU model, so it is appended onto the CPU_MODELS list if enabled. configs/test/test.py: Name change for DetailedCPU to DetailedO3CPU. Also include option for max tick. src/base/traceflags.py: Add in O3CPU trace flag. src/cpu/SConscript: Rename AlphaFullCPU to AlphaO3CPU. Only include checker sources if they're necessary. Also add a list of CPUs that support the Checker, and only allow the Checker to be compiled in if one of those CPUs are also being included. src/cpu/base_dyn_inst.cc: src/cpu/base_dyn_inst.hh: Rename typedef to ImplCPU instead of FullCPU, to differentiate from the old FullCPU. src/cpu/cpu_models.py: src/cpu/o3/alpha_cpu.cc: src/cpu/o3/alpha_cpu.hh: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: Rename AlphaFullCPU to AlphaO3CPU to differentiate from old FullCPU model. src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_dyn_inst_impl.hh: src/cpu/o3/alpha_impl.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/commit.hh: src/cpu/o3/cpu.hh: src/cpu/o3/decode.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_impl.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/regfile.hh: src/cpu/o3/rename.hh: src/cpu/o3/rename_impl.hh: src/cpu/o3/rob.hh: src/cpu/o3/rob_impl.hh: src/cpu/o3/thread_state.hh: src/python/m5/objects/AlphaO3CPU.py: Rename FullCPU to O3CPU to differentiate from old FullCPU model. src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/fetch_impl.hh: src/cpu/o3/lsq_unit_impl.hh: Rename FullCPU to O3CPU to differentiate from old FullCPU model. Also #ifdef the checker code so it doesn't need to be included if it's not selected. --HG-- rename : src/cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_builder.cc rename : src/cpu/checker/cpu_builder.cc => src/cpu/checker/ozone_builder.cc rename : src/python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaO3CPU.py extra : convert_revision : 86619baf257b8b7c8955efd447eba56e0d7acd6a --- SConstruct | 12 +- configs/test/test.py | 8 +- src/base/traceflags.py | 1 + src/cpu/SConscript | 25 +++- src/cpu/base_dyn_inst.cc | 4 +- src/cpu/base_dyn_inst.hh | 10 +- .../{o3_cpu_builder.cc => o3_builder.cc} | 0 .../{cpu_builder.cc => ozone_builder.cc} | 0 src/cpu/cpu_models.py | 2 +- src/cpu/o3/alpha_cpu.cc | 4 +- src/cpu/o3/alpha_cpu.hh | 14 +- src/cpu/o3/alpha_cpu_builder.cc | 22 +-- src/cpu/o3/alpha_cpu_impl.hh | 140 +++++++++--------- src/cpu/o3/alpha_dyn_inst.hh | 4 +- src/cpu/o3/alpha_dyn_inst_impl.hh | 2 +- src/cpu/o3/alpha_impl.hh | 14 +- src/cpu/o3/alpha_params.hh | 4 +- src/cpu/o3/commit.hh | 8 +- src/cpu/o3/commit_impl.hh | 15 +- src/cpu/o3/cpu.cc | 65 ++++---- src/cpu/o3/cpu.hh | 6 +- src/cpu/o3/decode.hh | 6 +- src/cpu/o3/decode_impl.hh | 6 +- src/cpu/o3/fetch.hh | 8 +- src/cpu/o3/fetch_impl.hh | 14 +- src/cpu/o3/iew.hh | 8 +- src/cpu/o3/iew_impl.hh | 8 +- src/cpu/o3/inst_queue.hh | 8 +- src/cpu/o3/lsq.hh | 6 +- src/cpu/o3/lsq_impl.hh | 2 +- src/cpu/o3/lsq_unit.hh | 10 +- src/cpu/o3/lsq_unit_impl.hh | 10 +- src/cpu/o3/regfile.hh | 6 +- src/cpu/o3/rename.hh | 6 +- src/cpu/o3/rename_impl.hh | 6 +- src/cpu/o3/rob.hh | 6 +- src/cpu/o3/rob_impl.hh | 2 +- src/cpu/o3/thread_state.hh | 8 +- .../{AlphaFullCPU.py => AlphaO3CPU.py} | 4 +- 39 files changed, 272 insertions(+), 212 deletions(-) rename src/cpu/checker/{o3_cpu_builder.cc => o3_builder.cc} (100%) rename src/cpu/checker/{cpu_builder.cc => ozone_builder.cc} (100%) rename src/python/m5/objects/{AlphaFullCPU.py => AlphaO3CPU.py} (98%) diff --git a/SConstruct b/SConstruct index 0cf15b1f97..c9ba13679c 100644 --- a/SConstruct +++ b/SConstruct @@ -260,8 +260,8 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] # Define the universe of supported CPU models env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', - 'FullCPU', 'AlphaFullCPU', - 'OzoneSimpleCPU', 'OzoneCPU', 'CheckerCPU'] + 'FullCPU', 'AlphaO3CPU', + 'OzoneSimpleCPU', 'OzoneCPU'] # Sticky options get saved in the options file so they persist from # one invocation to the next (unless overridden, in which case the new @@ -289,6 +289,7 @@ sticky_opts.AddOptions( False), BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql), BoolOption('USE_FENV', 'Use IEEE mode control', have_fenv), + BoolOption('USE_CHECKER', 'Use checker for detailed CPU models', False), ('CC', 'C compiler', os.environ.get('CC', env['CC'])), ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])), BoolOption('BATCH', 'Use batch pool for build and tests', False), @@ -303,7 +304,8 @@ nonsticky_opts.AddOptions( # These options get exported to #defines in config/*.hh (see m5/SConscript). env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \ - 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP'] + 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \ + 'USE_CHECKER'] # Define a handy 'no-op' action def no_action(target, source, env): @@ -470,6 +472,10 @@ for build_path in build_paths: env.ParseConfig(mysql_config_libs) env.ParseConfig(mysql_config_include) + # Check if the Checker is being used. If so append it to env['CPU_MODELS'] + if env['USE_CHECKER']: + env['CPU_MODELS'].append('CheckerCPU') + # Save sticky option settings back to current options file sticky_opts.Save(current_opts_file, env) diff --git a/configs/test/test.py b/configs/test/test.py index 2ece9e6752..05fdb7786d 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -14,6 +14,7 @@ parser = optparse.OptionParser(option_list=m5.standardOptions) parser.add_option("-c", "--cmd", default="hello") parser.add_option("-t", "--timing", action="store_true") parser.add_option("-f", "--full", action="store_true") +parser.add_option("-m", "--maxtick", type="int") (options, args) = parser.parse_args() @@ -34,7 +35,7 @@ mem = PhysicalMemory() if options.timing: cpu = TimingSimpleCPU() elif options.full: - cpu = DetailedCPU() + cpu = DetailedO3CPU() else: cpu = AtomicSimpleCPU() cpu.workload = process @@ -48,7 +49,10 @@ root = Root(system = system) m5.instantiate(root) # simulate until program terminates -exit_event = m5.simulate() +if options.maxtick: + exit_event = m5.simulate(options.maxtick) +else: + exit_event = m5.simulate() print 'Exiting @', m5.curTick(), 'because', exit_event.getCause() diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 7ff68bcaf4..c4dcb695b2 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -115,6 +115,7 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', + 'O3CPU', 'OzoneCPU', 'FE', 'IBE', diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 34bad132c3..608625ed4a 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -92,6 +92,10 @@ env.Command('static_inst_exec_sigs.hh', models_db, Action(gen_cpu_exec_signatures, gen_sigs_string, varlist = ['CPU_MODELS'])) +# List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True +# and one of these are not being used. +CheckerSupportedCPUList = ['AlphaO3CPU', 'OzoneCPU'] + ################################################################# # # Include CPU-model-specific files based on set of models @@ -116,7 +120,7 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') -if 'AlphaFullCPU' in env['CPU_MODELS']: +if 'AlphaO3CPU' in env['CPU_MODELS']: sources += Split(''' base_dyn_inst.cc o3/2bit_local_pred.cc @@ -144,6 +148,8 @@ if 'AlphaFullCPU' in env['CPU_MODELS']: o3/store_set.cc o3/tournament_pred.cc ''') + if 'CheckerCPU' in env['CPU_MODELS']: + sources += Split('checker/o3_builder.cc') if 'OzoneSimpleCPU' in env['CPU_MODELS']: sources += Split(''' @@ -155,6 +161,8 @@ if 'OzoneSimpleCPU' in env['CPU_MODELS']: ozone/inst_queue.cc ozone/rename_table.cc ''') + if 'CheckerCPU' in env['CPU_MODELS']: + sources += Split('checker/ozone_builder.cc') if 'OzoneCPU' in env['CPU_MODELS']: sources += Split(''' @@ -164,10 +172,17 @@ if 'OzoneCPU' in env['CPU_MODELS']: ''') if 'CheckerCPU' in env['CPU_MODELS']: - sources += Split(''' - checker/cpu.cc - checker/o3_cpu_builder.cc - ''') + sources += Split('checker/cpu.cc') + checker_supports = False + for i in CheckerSupportedCPUList: + if i in env['CPU_MODELS']: + checker_supports = True + if not checker_supports: + print "Checker only supports CPU models %s, please " \ + "set USE_CHECKER=False or use one of those CPU models" \ + % CheckerSupportedCPUList + Exit(1) + # FullCPU sources are included from m5/SConscript since they're not # below this point in the file hierarchy. diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index 4bcd598480..5e2a6392a7 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -71,8 +71,8 @@ my_hash_t thishash; template BaseDynInst::BaseDynInst(ExtMachInst machInst, Addr inst_PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/ + ImplCPU *cpu) + : staticInst(machInst), traceData(NULL), cpu(cpu) { seqNum = seq_num; diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index f188e661c3..9cc61f74cc 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -59,8 +59,8 @@ class BaseDynInst : public FastAlloc, public RefCounted { public: // Typedef for the CPU. - typedef typename Impl::FullCPU FullCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename Impl::CPUType ImplCPU; + typedef typename ImplCPU::ImplState ImplState; // Binary machine instruction type. typedef TheISA::MachInst MachInst; @@ -165,8 +165,8 @@ class BaseDynInst : public FastAlloc, public RefCounted /** How many source registers are ready. */ unsigned readyRegs; - /** Pointer to the FullCPU object. */ - FullCPU *cpu; + /** Pointer to the Impl's CPU object. */ + ImplCPU *cpu; /** Pointer to the thread state. */ ImplState *thread; @@ -239,7 +239,7 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param cpu Pointer to the instruction's CPU. */ BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + ImplCPU *cpu); /** BaseDynInst constructor given a StaticInst pointer. * @param _staticInst The StaticInst for this BaseDynInst. diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/checker/o3_builder.cc similarity index 100% rename from src/cpu/checker/o3_cpu_builder.cc rename to src/cpu/checker/o3_builder.cc diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/checker/ozone_builder.cc similarity index 100% rename from src/cpu/checker/cpu_builder.cc rename to src/cpu/checker/ozone_builder.cc diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1a9724ca63..e7ef9ab42b 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -67,7 +67,7 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "encumbered/cpu/full/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) -CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', +CpuModel('AlphaO3CPU', 'alpha_o3_exec.cc', '#include "cpu/o3/alpha_dyn_inst.hh"', { 'CPU_exec_context': 'AlphaDynInst' }) CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha_cpu.cc index 39cae696b4..e44ed00311 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha_cpu.cc @@ -32,7 +32,7 @@ #include "cpu/o3/alpha_cpu_impl.hh" #include "cpu/o3/alpha_dyn_inst.hh" -// Force instantiation of AlphaFullCPU for all the implemntations that are +// Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all // classes that depend on a certain impl, into one file (alpha_impl.cc?). -template class AlphaFullCPU; +template class AlphaO3CPU; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index f81837f3c9..4daa8b3ba9 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -44,7 +44,7 @@ namespace Kernel { class TranslatingPort; /** - * AlphaFullCPU class. Derives from the FullO3CPU class, and + * AlphaO3CPU class. Derives from the FullO3CPU class, and * implements all ISA and implementation specific functions of the * CPU. This is the CPU class that is used for the SimObjects, and is * what is given to the DynInsts. Most of its state exists in the @@ -52,7 +52,7 @@ class TranslatingPort; * functionality. */ template -class AlphaFullCPU : public FullO3CPU +class AlphaO3CPU : public FullO3CPU { protected: typedef TheISA::IntReg IntReg; @@ -67,17 +67,17 @@ class AlphaFullCPU : public FullO3CPU typedef O3ThreadState Thread; typedef typename Impl::Params Params; - /** Constructs an AlphaFullCPU with the given parameters. */ - AlphaFullCPU(Params *params); + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); /** - * Derived ThreadContext class for use with the AlphaFullCPU. It + * Derived ThreadContext class for use with the AlphaO3CPU. It * provides the interface for any external objects to access a * single thread's state and some general CPU state. Any time * external objects try to update state through this interface, * the CPU will create an event to squash all in-flight * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaFullCPU because + * It must be defined specifically for the AlphaO3CPU because * not all architectural state is located within the O3ThreadState * (such as the commit PC, and registers), and specific actions * must be taken when using this interface (such as squashing all @@ -87,7 +87,7 @@ class AlphaFullCPU : public FullO3CPU { public: /** Pointer to the CPU. */ - AlphaFullCPU *cpu; + AlphaO3CPU *cpu; /** Pointer to the thread state that this TC corrseponds to. */ O3ThreadState *thread; diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index a6fbe34d7b..b1e141ff4e 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -37,15 +37,15 @@ #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaFullCPU : public AlphaFullCPU +class DerivAlphaO3CPU : public AlphaO3CPU { public: - DerivAlphaFullCPU(AlphaSimpleParams *p) - : AlphaFullCPU(p) + DerivAlphaO3CPU(AlphaSimpleParams *p) + : AlphaO3CPU(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) Param clock; Param numThreads; @@ -144,9 +144,9 @@ Param defer_registration; Param function_trace; Param function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -261,11 +261,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) +END_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) -CREATE_SIM_OBJECT(DerivAlphaFullCPU) +CREATE_SIM_OBJECT(DerivAlphaO3CPU) { - DerivAlphaFullCPU *cpu; + DerivAlphaO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -386,10 +386,10 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaFullCPU(params); + cpu = new DerivAlphaO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) +REGISTER_SIM_OBJECT("DerivAlphaO3CPU", DerivAlphaO3CPU) diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index bfd05d2606..532611fb6a 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -28,6 +28,8 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/alpha/faults.hh" #include "base/cprintf.hh" #include "base/statistics.hh" @@ -53,14 +55,14 @@ using namespace TheISA; template -AlphaFullCPU::AlphaFullCPU(Params *params) +AlphaO3CPU::AlphaO3CPU(Params *params) #if FULL_SYSTEM : FullO3CPU(params), itb(params->itb), dtb(params->dtb) #else : FullO3CPU(params) #endif { - DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + DPRINTF(O3CPU, "Creating AlphaO3CPU object.\n"); // Setup any thread state. this->thread.resize(this->numThreads); @@ -73,7 +75,7 @@ AlphaFullCPU::AlphaFullCPU(Params *params) this->thread[i]->setStatus(ThreadContext::Suspended); #else if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", + DPRINTF(O3CPU, "Workload[%i] process is %#x", i, this->thread[i]); this->thread[i] = new Thread(this, i, params->workload[i], i, params->mem); @@ -110,14 +112,16 @@ AlphaFullCPU::AlphaFullCPU(Params *params) // Setup the TC that will serve as the interface to the threads/CPU. AlphaTC *alpha_tc = new AlphaTC; + tc = alpha_tc; + // If we're using a checker, then the TC should be the // CheckerThreadContext. +#if USE_CHECKER if (params->checker) { tc = new CheckerThreadContext( alpha_tc, this->checker); - } else { - tc = alpha_tc; } +#endif alpha_tc->cpu = this; alpha_tc->thread = this->thread[i]; @@ -172,7 +176,7 @@ AlphaFullCPU::AlphaFullCPU(Params *params) template void -AlphaFullCPU::regStats() +AlphaO3CPU::regStats() { // Register stats for everything that has stats. this->fullCPURegStats(); @@ -186,7 +190,7 @@ AlphaFullCPU::regStats() #if FULL_SYSTEM template VirtualPort * -AlphaFullCPU::AlphaTC::getVirtPort(ThreadContext *src_tc) +AlphaO3CPU::AlphaTC::getVirtPort(ThreadContext *src_tc) { if (!src_tc) return thread->getVirtPort(); @@ -203,7 +207,7 @@ AlphaFullCPU::AlphaTC::getVirtPort(ThreadContext *src_tc) template void -AlphaFullCPU::AlphaTC::dumpFuncProfile() +AlphaO3CPU::AlphaTC::dumpFuncProfile() { // Currently not supported } @@ -211,7 +215,7 @@ AlphaFullCPU::AlphaTC::dumpFuncProfile() template void -AlphaFullCPU::AlphaTC::takeOverFrom(ThreadContext *old_context) +AlphaO3CPU::AlphaTC::takeOverFrom(ThreadContext *old_context) { // some things should already be set up #if FULL_SYSTEM @@ -253,7 +257,7 @@ AlphaFullCPU::AlphaTC::takeOverFrom(ThreadContext *old_context) #if FULL_SYSTEM template void -AlphaFullCPU::AlphaTC::delVirtPort(VirtualPort *vp) +AlphaO3CPU::AlphaTC::delVirtPort(VirtualPort *vp) { delete vp->getPeer(); delete vp; @@ -262,9 +266,9 @@ AlphaFullCPU::AlphaTC::delVirtPort(VirtualPort *vp) template void -AlphaFullCPU::AlphaTC::activate(int delay) +AlphaO3CPU::AlphaTC::activate(int delay) { - DPRINTF(FullCPU, "Calling activate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); if (thread->status() == ThreadContext::Active) return; @@ -286,9 +290,9 @@ AlphaFullCPU::AlphaTC::activate(int delay) template void -AlphaFullCPU::AlphaTC::suspend() +AlphaO3CPU::AlphaTC::suspend() { - DPRINTF(FullCPU, "Calling suspend on AlphaTC\n"); + DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); if (thread->status() == ThreadContext::Suspended) return; @@ -312,9 +316,9 @@ AlphaFullCPU::AlphaTC::suspend() template void -AlphaFullCPU::AlphaTC::deallocate() +AlphaO3CPU::AlphaTC::deallocate() { - DPRINTF(FullCPU, "Calling deallocate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); if (thread->status() == ThreadContext::Unallocated) return; @@ -325,9 +329,9 @@ AlphaFullCPU::AlphaTC::deallocate() template void -AlphaFullCPU::AlphaTC::halt() +AlphaO3CPU::AlphaTC::halt() { - DPRINTF(FullCPU, "Calling halt on AlphaTC\n"); + DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); if (thread->status() == ThreadContext::Halted) return; @@ -338,7 +342,7 @@ AlphaFullCPU::AlphaTC::halt() template void -AlphaFullCPU::AlphaTC::regStats(const std::string &name) +AlphaO3CPU::AlphaTC::regStats(const std::string &name) { #if FULL_SYSTEM thread->kernelStats = new Kernel::Statistics(cpu->system); @@ -348,7 +352,7 @@ AlphaFullCPU::AlphaTC::regStats(const std::string &name) template void -AlphaFullCPU::AlphaTC::serialize(std::ostream &os) +AlphaO3CPU::AlphaTC::serialize(std::ostream &os) { #if FULL_SYSTEM if (thread->kernelStats) @@ -359,7 +363,7 @@ AlphaFullCPU::AlphaTC::serialize(std::ostream &os) template void -AlphaFullCPU::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) +AlphaO3CPU::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) { #if FULL_SYSTEM if (thread->kernelStats) @@ -371,46 +375,46 @@ AlphaFullCPU::AlphaTC::unserialize(Checkpoint *cp, const std::string § #if FULL_SYSTEM template EndQuiesceEvent * -AlphaFullCPU::AlphaTC::getQuiesceEvent() +AlphaO3CPU::AlphaTC::getQuiesceEvent() { return thread->quiesceEvent; } template Tick -AlphaFullCPU::AlphaTC::readLastActivate() +AlphaO3CPU::AlphaTC::readLastActivate() { return thread->lastActivate; } template Tick -AlphaFullCPU::AlphaTC::readLastSuspend() +AlphaO3CPU::AlphaTC::readLastSuspend() { return thread->lastSuspend; } template void -AlphaFullCPU::AlphaTC::profileClear() +AlphaO3CPU::AlphaTC::profileClear() {} template void -AlphaFullCPU::AlphaTC::profileSample() +AlphaO3CPU::AlphaTC::profileSample() {} #endif template TheISA::MachInst -AlphaFullCPU::AlphaTC:: getInst() +AlphaO3CPU::AlphaTC:: getInst() { return thread->getInst(); } template void -AlphaFullCPU::AlphaTC::copyArchRegs(ThreadContext *tc) +AlphaO3CPU::AlphaTC::copyArchRegs(ThreadContext *tc) { // This function will mess things up unless the ROB is empty and // there are no instructions in the pipeline. @@ -421,7 +425,7 @@ AlphaFullCPU::AlphaTC::copyArchRegs(ThreadContext *tc) for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { renamed_reg = cpu->renameMap[tid].lookup(i); - DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " "now has data %lli.\n", renamed_reg, cpu->readIntReg(renamed_reg), tc->readIntReg(i)); @@ -449,19 +453,19 @@ AlphaFullCPU::AlphaTC::copyArchRegs(ThreadContext *tc) template void -AlphaFullCPU::AlphaTC::clearArchRegs() +AlphaO3CPU::AlphaTC::clearArchRegs() {} template uint64_t -AlphaFullCPU::AlphaTC::readIntReg(int reg_idx) +AlphaO3CPU::AlphaTC::readIntReg(int reg_idx) { return cpu->readArchIntReg(reg_idx, thread->readTid()); } template FloatReg -AlphaFullCPU::AlphaTC::readFloatReg(int reg_idx, int width) +AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx, int width) { switch(width) { case 32: @@ -476,14 +480,14 @@ AlphaFullCPU::AlphaTC::readFloatReg(int reg_idx, int width) template FloatReg -AlphaFullCPU::AlphaTC::readFloatReg(int reg_idx) +AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx) { return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); } template FloatRegBits -AlphaFullCPU::AlphaTC::readFloatRegBits(int reg_idx, int width) +AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx, int width) { DPRINTF(Fault, "Reading floatint register through the TC!\n"); return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); @@ -491,14 +495,14 @@ AlphaFullCPU::AlphaTC::readFloatRegBits(int reg_idx, int width) template FloatRegBits -AlphaFullCPU::AlphaTC::readFloatRegBits(int reg_idx) +AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx) { return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); } template void -AlphaFullCPU::AlphaTC::setIntReg(int reg_idx, uint64_t val) +AlphaO3CPU::AlphaTC::setIntReg(int reg_idx, uint64_t val) { cpu->setArchIntReg(reg_idx, val, thread->readTid()); @@ -510,7 +514,7 @@ AlphaFullCPU::AlphaTC::setIntReg(int reg_idx, uint64_t val) template void -AlphaFullCPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) +AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) { switch(width) { case 32: @@ -529,7 +533,7 @@ AlphaFullCPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) template void -AlphaFullCPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val) +AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val) { cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); @@ -540,7 +544,7 @@ AlphaFullCPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val) template void -AlphaFullCPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, +AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, int width) { DPRINTF(Fault, "Setting floatint register through the TC!\n"); @@ -554,7 +558,7 @@ AlphaFullCPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, template void -AlphaFullCPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) +AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) { cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); @@ -566,7 +570,7 @@ AlphaFullCPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) template void -AlphaFullCPU::AlphaTC::setPC(uint64_t val) +AlphaO3CPU::AlphaTC::setPC(uint64_t val) { cpu->setPC(val, thread->readTid()); @@ -578,7 +582,7 @@ AlphaFullCPU::AlphaTC::setPC(uint64_t val) template void -AlphaFullCPU::AlphaTC::setNextPC(uint64_t val) +AlphaO3CPU::AlphaTC::setNextPC(uint64_t val) { cpu->setNextPC(val, thread->readTid()); @@ -590,7 +594,7 @@ AlphaFullCPU::AlphaTC::setNextPC(uint64_t val) template Fault -AlphaFullCPU::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) +AlphaO3CPU::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) { Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); @@ -604,8 +608,8 @@ AlphaFullCPU::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) template Fault -AlphaFullCPU::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) +AlphaO3CPU::AlphaTC::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) { Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->readTid()); @@ -622,21 +626,21 @@ AlphaFullCPU::AlphaTC::setMiscRegWithEffect(int misc_reg, template TheISA::IntReg -AlphaFullCPU::AlphaTC::getSyscallArg(int i) +AlphaO3CPU::AlphaTC::getSyscallArg(int i) { return cpu->getSyscallArg(i, thread->readTid()); } template void -AlphaFullCPU::AlphaTC::setSyscallArg(int i, IntReg val) +AlphaO3CPU::AlphaTC::setSyscallArg(int i, IntReg val) { cpu->setSyscallArg(i, val, thread->readTid()); } template void -AlphaFullCPU::AlphaTC::setSyscallReturn(SyscallReturn return_value) +AlphaO3CPU::AlphaTC::setSyscallReturn(SyscallReturn return_value) { cpu->setSyscallReturn(return_value, thread->readTid()); } @@ -645,37 +649,37 @@ AlphaFullCPU::AlphaTC::setSyscallReturn(SyscallReturn return_value) template MiscReg -AlphaFullCPU::readMiscReg(int misc_reg, unsigned tid) +AlphaO3CPU::readMiscReg(int misc_reg, unsigned tid) { return this->regFile.readMiscReg(misc_reg, tid); } template MiscReg -AlphaFullCPU::readMiscRegWithEffect(int misc_reg, Fault &fault, - unsigned tid) +AlphaO3CPU::readMiscRegWithEffect(int misc_reg, Fault &fault, + unsigned tid) { return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); } template Fault -AlphaFullCPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) +AlphaO3CPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) { return this->regFile.setMiscReg(misc_reg, val, tid); } template Fault -AlphaFullCPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, - unsigned tid) +AlphaO3CPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, + unsigned tid) { return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); } template void -AlphaFullCPU::squashFromTC(unsigned tid) +AlphaO3CPU::squashFromTC(unsigned tid) { this->thread[tid]->inSyscall = true; this->commit.generateTCEvent(tid); @@ -685,7 +689,7 @@ AlphaFullCPU::squashFromTC(unsigned tid) template void -AlphaFullCPU::post_interrupt(int int_num, int index) +AlphaO3CPU::post_interrupt(int int_num, int index) { BaseCPU::post_interrupt(int_num, index); @@ -697,21 +701,21 @@ AlphaFullCPU::post_interrupt(int int_num, int index) template int -AlphaFullCPU::readIntrFlag() +AlphaO3CPU::readIntrFlag() { return this->regFile.readIntrFlag(); } template void -AlphaFullCPU::setIntrFlag(int val) +AlphaO3CPU::setIntrFlag(int val) { this->regFile.setIntrFlag(val); } template Fault -AlphaFullCPU::hwrei(unsigned tid) +AlphaO3CPU::hwrei(unsigned tid) { // Need to clear the lock flag upon returning from an interrupt. this->lockFlag = false; @@ -726,7 +730,7 @@ AlphaFullCPU::hwrei(unsigned tid) template bool -AlphaFullCPU::simPalCheck(int palFunc, unsigned tid) +AlphaO3CPU::simPalCheck(int palFunc, unsigned tid) { if (this->thread[tid]->kernelStats) this->thread[tid]->kernelStats->callpal(palFunc, @@ -751,7 +755,7 @@ AlphaFullCPU::simPalCheck(int palFunc, unsigned tid) template void -AlphaFullCPU::trap(Fault fault, unsigned tid) +AlphaO3CPU::trap(Fault fault, unsigned tid) { // Pass the thread's TC into the invoke method. fault->invoke(this->threadContexts[tid]); @@ -759,7 +763,7 @@ AlphaFullCPU::trap(Fault fault, unsigned tid) template void -AlphaFullCPU::processInterrupts() +AlphaO3CPU::processInterrupts() { // Check for interrupts here. For now can copy the code that // exists within isa_fullsys_traits.hh. Also assume that thread 0 @@ -805,10 +809,12 @@ AlphaFullCPU::processInterrupts() this->setMiscReg(IPR_ISR, summary, 0); this->setMiscReg(IPR_INTID, ipl, 0); // Checker needs to know these two registers were updated. +#if USE_CHECKER if (this->checker) { this->checker->threadBase()->setMiscReg(IPR_ISR, summary); this->checker->threadBase()->setMiscReg(IPR_INTID, ipl); } +#endif this->trap(Fault(new InterruptFault), 0); DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", this->readMiscReg(IPR_IPLR, 0), ipl, summary); @@ -821,9 +827,9 @@ AlphaFullCPU::processInterrupts() template void -AlphaFullCPU::syscall(int64_t callnum, int tid) +AlphaO3CPU::syscall(int64_t callnum, int tid) { - DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); + DPRINTF(O3CPU, "[tid:%i] Executing syscall().\n\n", tid); DPRINTF(Activity,"Activity: syscall() called.\n"); @@ -841,21 +847,21 @@ AlphaFullCPU::syscall(int64_t callnum, int tid) template TheISA::IntReg -AlphaFullCPU::getSyscallArg(int i, int tid) +AlphaO3CPU::getSyscallArg(int i, int tid) { return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); } template void -AlphaFullCPU::setSyscallArg(int i, IntReg val, int tid) +AlphaO3CPU::setSyscallArg(int i, IntReg val, int tid) { this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); } template void -AlphaFullCPU::setSyscallReturn(SyscallReturn return_value, int tid) +AlphaO3CPU::setSyscallReturn(SyscallReturn return_value, int tid) { // check for error condition. Alpha syscall convention is to // indicate success/failure in reg a3 (r19) and put the diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh index 36a08c4a74..464e53e9da 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha_dyn_inst.hh @@ -51,7 +51,7 @@ class AlphaDynInst : public BaseDynInst { public: /** Typedef for the CPU. */ - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** Binary machine instruction type. */ typedef TheISA::MachInst MachInst; @@ -74,7 +74,7 @@ class AlphaDynInst : public BaseDynInst public: /** BaseDynInst constructor given a binary instruction. */ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, - FullCPU *cpu); + O3CPU *cpu); /** BaseDynInst constructor given a static inst pointer. */ AlphaDynInst(StaticInstPtr &_staticInst); diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh index a73cf4a7da..6183a755e5 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -32,7 +32,7 @@ template AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, O3CPU *cpu) : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) { initVars(); diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha_impl.hh index 52f7c23948..84c9e1c004 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha_impl.hh @@ -41,12 +41,12 @@ template class AlphaDynInst; template -class AlphaFullCPU; +class AlphaO3CPU; /** Implementation specific struct that defines several key types to the * CPU, the stages within the CPU, the time buffers, and the DynInst. * The struct defines the ISA, the CPU policy, the specific DynInst, the - * specific FullCPU, and all of the structs from the time buffers to do + * specific O3CPU, and all of the structs from the time buffers to do * communication. * This is one of the key things that must be defined for each hardware * specific CPU implementation. @@ -67,8 +67,14 @@ struct AlphaSimpleImpl */ typedef RefCountingPtr DynInstPtr; - /** The FullCPU type to be used. */ - typedef AlphaFullCPU FullCPU; + /** The O3CPU type to be used. */ + typedef AlphaO3CPU O3CPU; + + /** Same typedef, but for CPUType. BaseDynInst may not always use + * an O3 CPU, so it's clearer to call it CPUType instead in that + * case. + */ + typedef O3CPU CPUType; /** The Params to be passed to each stage. */ typedef AlphaSimpleParams Params; diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index 2ece7fb7fc..f0732733e2 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -42,12 +42,12 @@ class Process; class System; /** - * This file defines the parameters that will be used for the AlphaFullCPU. + * This file defines the parameters that will be used for the AlphaO3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ -class AlphaSimpleParams : public BaseFullCPU::Params +class AlphaSimpleParams : public BaseO3CPU::Params { public: diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 0b31cb9c85..1c0cd10378 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -67,7 +67,7 @@ class DefaultCommit { public: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -145,7 +145,7 @@ class DefaultCommit void regStats(); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the list of threads. */ void setThreads(std::vector &threads); @@ -317,8 +317,8 @@ class DefaultCommit ROB *rob; private: - /** Pointer to FullCPU. */ - FullCPU *cpu; + /** Pointer to O3CPU. */ + O3CPU *cpu; /** Vector of all of the threads. */ std::vector thread; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index a182719184..566324b69a 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -28,6 +28,9 @@ * Authors: Kevin Lim */ +#include "config/full_system.hh" +#include "config/use_checker.hh" + #include #include @@ -219,14 +222,14 @@ DefaultCommit::regStats() template void -DefaultCommit::setCPU(FullCPU *cpu_ptr) +DefaultCommit::setCPU(O3CPU *cpu_ptr) { DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); cpu = cpu_ptr; // Commit must broadcast the number of free entries it has at the start of // the simulation, so it starts as active. - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); fetchTrapLatency = cpu->cycles(fetchTrapLatency); @@ -395,10 +398,10 @@ DefaultCommit::updateStatus() if (_nextStatus == Inactive && _status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::CommitIdx); + cpu->deactivateStage(O3CPU::CommitIdx); } else if (_nextStatus == Active && _status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::CommitIdx); + cpu->activateStage(O3CPU::CommitIdx); } _status = _nextStatus; @@ -972,11 +975,13 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) head_inst->setCompleted(); } +#if USE_CHECKER // Use checker prior to updating anything due to traps or PC // based events. if (cpu->checker) { cpu->checker->verify(head_inst); } +#endif // Check if the instruction caused a fault. If so, trap. Fault inst_fault = head_inst->getFault(); @@ -992,9 +997,11 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) return false; } +#if USE_CHECKER if (cpu->checker && head_inst->isStore()) { cpu->checker->verify(head_inst); } +#endif assert(!thread[tid]->inSyscall); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 788c6b1647..4300788605 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -29,6 +29,7 @@ */ #include "config/full_system.hh" +#include "config/use_checker.hh" #if FULL_SYSTEM #include "sim/system.hh" @@ -50,13 +51,13 @@ using namespace std; using namespace TheISA; -BaseFullCPU::BaseFullCPU(Params *params) +BaseO3CPU::BaseO3CPU(Params *params) : BaseCPU(params), cpu_id(0) { } void -BaseFullCPU::regStats() +BaseO3CPU::regStats() { BaseCPU::regStats(); } @@ -83,7 +84,7 @@ FullO3CPU::TickEvent::description() template FullO3CPU::FullO3CPU(Params *params) - : BaseFullCPU(params), + : BaseO3CPU(params), tickEvent(this), removeInstsThisCycle(false), fetch(params), @@ -131,6 +132,9 @@ FullO3CPU::FullO3CPU(Params *params) { _status = Idle; + checker = NULL; + +#if USE_CHECKER if (params->checker) { BaseCPU *temp_checker = params->checker; checker = dynamic_cast *>(temp_checker); @@ -138,9 +142,8 @@ FullO3CPU::FullO3CPU(Params *params) #if FULL_SYSTEM checker->setSystem(params->system); #endif - } else { - checker = NULL; } +#endif #if !FULL_SYSTEM thread.resize(number_of_threads); @@ -261,9 +264,9 @@ template void FullO3CPU::fullCPURegStats() { - BaseFullCPU::regStats(); + BaseO3CPU::regStats(); - // Register any of the FullCPU's stats here. + // Register any of the O3CPU's stats here. timesIdled .name(name() + ".timesIdled") .desc("Number of times that the entire CPU went into an idle state and" @@ -319,7 +322,7 @@ template void FullO3CPU::tick() { - DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n"); ++numCycles; @@ -418,7 +421,7 @@ template void FullO3CPU::insertThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread data"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. #if 0 @@ -465,7 +468,7 @@ template void FullO3CPU::removeThread(unsigned tid) { - DPRINTF(FullCPU,"[tid:%i] Removing thread data"); + DPRINTF(O3CPU,"[tid:%i] Removing thread data"); #if 0 //Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { @@ -511,37 +514,37 @@ template void FullO3CPU::activateWhenReady(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" + DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming" "(e.g. PhysRegs/ROB/IQ/LSQ) \n", tid); bool ready = true; if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Int. Regs.\n", tid); ready = false; } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "Phys. Float. Regs.\n", tid); ready = false; } else if (commit.rob->numFreeEntries() >= commit.rob->entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "ROB entries.\n", tid); ready = false; } else if (iew.instQueue.numFreeEntries() >= iew.instQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "IQ entries.\n", tid); ready = false; } else if (iew.ldstQueue.numFreeEntries() >= iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { - DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " "LSQ entries.\n", tid); ready = false; @@ -575,7 +578,7 @@ FullO3CPU::activateContext(int tid, int delay) if (isActive == activeThreads.end()) { //May Need to Re-code this if the delay variable is the //delay needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); @@ -597,7 +600,7 @@ template void FullO3CPU::suspendContext(int tid) { - DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); + DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid); unscheduleTickEvent(); _status = Idle; /* @@ -606,7 +609,7 @@ FullO3CPU::suspendContext(int tid) activeThreads.begin(), activeThreads.end(), tid); if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); activeThreads.erase(isActive); } @@ -617,14 +620,14 @@ template void FullO3CPU::deallocateContext(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); + DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid); /* //Remove From Active List, if Active list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); activeThreads.erase(isActive); @@ -637,14 +640,14 @@ template void FullO3CPU::haltContext(int tid) { - DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); + DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid); /* //Remove From Active List, if Active list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); if (isActive != activeThreads.end()) { - DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); activeThreads.erase(isActive); @@ -730,7 +733,7 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU) if (isActive == activeThreads.end()) { //May Need to Re-code this if the delay variable is the delay //needed for thread to activate - DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", tid); activeThreads.push_back(tid); @@ -958,7 +961,7 @@ template void FullO3CPU::removeFrontInst(DynInstPtr &inst) { - DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " + DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x " "[sn:%lli]\n", inst->threadNumber, inst->readPC(), inst->seqNum); @@ -972,7 +975,7 @@ template void FullO3CPU::removeInstsNotInROB(unsigned tid) { - DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" + DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction" " list.\n", tid); ListIt end_it; @@ -982,12 +985,12 @@ FullO3CPU::removeInstsNotInROB(unsigned tid) if (instList.empty()) { return; } else if (rob.isEmpty(/*tid*/)) { - DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); + DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n"); end_it = instList.begin(); rob_empty = true; } else { end_it = (rob.readTailInst(tid))->getInstListIt(); - DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); + DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n"); } removeInstsThisCycle = true; @@ -1026,7 +1029,7 @@ FullO3CPU::removeInstsUntil(const InstSeqNum &seq_num, inst_iter--; - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " + DPRINTF(O3CPU, "Deleting instructions from instruction " "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", tid, seq_num, (*inst_iter)->seqNum); @@ -1048,7 +1051,7 @@ inline void FullO3CPU::squashInstIt(const ListIt &instIt, const unsigned &tid) { if ((*instIt)->threadNumber == tid) { - DPRINTF(FullCPU, "FullCPU: Squashing instruction, " + DPRINTF(O3CPU, "Squashing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*instIt)->threadNumber, (*instIt)->seqNum, @@ -1069,7 +1072,7 @@ void FullO3CPU::cleanUpRemovedInsts() { while (!removeList.empty()) { - DPRINTF(FullCPU, "FullCPU: Removing instruction, " + DPRINTF(O3CPU, "Removing instruction, " "[tid:%i] [sn:%lli] PC %#x\n", (*removeList.front())->threadNumber, (*removeList.front())->seqNum, diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index ff41a33061..bd68701141 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -56,13 +56,13 @@ class ThreadContext; class MemObject; class Process; -class BaseFullCPU : public BaseCPU +class BaseO3CPU : public BaseCPU { //Stuff that's pretty ISA independent will go here. public: typedef BaseCPU::Params Params; - BaseFullCPU(Params *params); + BaseO3CPU(Params *params); void regStats(); @@ -78,7 +78,7 @@ class BaseFullCPU : public BaseCPU * tick() function for the CPU is defined here. */ template -class FullO3CPU : public BaseFullCPU +class FullO3CPU : public BaseO3CPU { public: typedef TheISA::FloatReg FloatReg; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index ff88358d62..1edf3335db 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -48,7 +48,7 @@ class DefaultDecode { private: // Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; typedef typename Impl::CPUPol CPUPol; @@ -95,7 +95,7 @@ class DefaultDecode void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer *tb_ptr); @@ -189,7 +189,7 @@ class DefaultDecode private: // Interfaces to objects outside of decode. /** CPU interface. */ - FullCPU *cpu; + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer *timeBuffer; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 48f6ee6124..16be017845 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -112,7 +112,7 @@ DefaultDecode::regStats() template void -DefaultDecode::setCPU(FullCPU *cpu_ptr) +DefaultDecode::setCPU(O3CPU *cpu_ptr) { DPRINTF(Decode, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -427,7 +427,7 @@ DefaultDecode::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::DecodeIdx); + cpu->activateStage(O3CPU::DecodeIdx); } } else { // If it's not unblocking, then decode will not have any internal @@ -436,7 +436,7 @@ DefaultDecode::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::DecodeIdx); + cpu->deactivateStage(O3CPU::DecodeIdx); } } } diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 962d464372..476d4343f3 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -57,7 +57,7 @@ class DefaultFetch typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; /** Typedefs from the CPU policy. */ @@ -164,7 +164,7 @@ class DefaultFetch void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer *time_buffer); @@ -296,8 +296,8 @@ class DefaultFetch int branchCount(); private: - /** Pointer to the FullCPU. */ - FullCPU *cpu; + /** Pointer to the O3CPU. */ + O3CPU *cpu; /** Time buffer interface. */ TimeBuffer *timeBuffer; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 477a1469cc..ab706ed472 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -28,6 +28,8 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "arch/utility.hh" #include "cpu/checker/cpu.hh" @@ -268,7 +270,7 @@ DefaultFetch::regStats() template void -DefaultFetch::setCPU(FullCPU *cpu_ptr) +DefaultFetch::setCPU(O3CPU *cpu_ptr) { DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; @@ -280,9 +282,11 @@ DefaultFetch::setCPU(FullCPU *cpu_ptr) icachePort->setPeer(mem_dport); mem_dport->setPeer(icachePort); +#if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); } +#endif // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. @@ -430,7 +434,7 @@ DefaultFetch::switchToActive() if (_status == Inactive) { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); _status = Active; } @@ -443,7 +447,7 @@ DefaultFetch::switchToInactive() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); _status = Inactive; } @@ -662,7 +666,7 @@ DefaultFetch::updateFetchStatus() "completion\n",tid); } - cpu->activateStage(FullCPU::FetchIdx); + cpu->activateStage(O3CPU::FetchIdx); } return Active; @@ -673,7 +677,7 @@ DefaultFetch::updateFetchStatus() if (_status == Active) { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::FetchIdx); + cpu->deactivateStage(O3CPU::FetchIdx); } return Inactive; diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 615022dc94..2af68d8fcf 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -68,7 +68,7 @@ class DefaultIEW //Typedefs from Impl typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; typedef typename CPUPol::IQ IQ; @@ -80,7 +80,7 @@ class DefaultIEW typedef typename CPUPol::RenameStruct RenameStruct; typedef typename CPUPol::IssueStruct IssueStruct; - friend class Impl::FullCPU; + friend class Impl::O3CPU; friend class CPUPol::IQ; public: @@ -126,7 +126,7 @@ class DefaultIEW void initStage(); /** Sets CPU pointer for IEW, IQ, and LSQ. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets main time buffer used for backwards communication. */ void setTimeBuffer(TimeBuffer *tb_ptr); @@ -331,7 +331,7 @@ class DefaultIEW private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** Records if IEW has written to the time buffer this cycle, so that the * CPU can deschedule itself if there is no activity. diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 6c207d94a7..8e6fd46a11 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -276,7 +276,7 @@ DefaultIEW::initStage() template void -DefaultIEW::setCPU(FullCPU *cpu_ptr) +DefaultIEW::setCPU(O3CPU *cpu_ptr) { DPRINTF(IEW, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -284,7 +284,7 @@ DefaultIEW::setCPU(FullCPU *cpu_ptr) instQueue.setCPU(cpu_ptr); ldstQueue.setCPU(cpu_ptr); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template @@ -857,7 +857,7 @@ inline void DefaultIEW::activateStage() { DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::IEWIdx); + cpu->activateStage(O3CPU::IEWIdx); } template @@ -865,7 +865,7 @@ inline void DefaultIEW::deactivateStage() { DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::IEWIdx); + cpu->deactivateStage(O3CPU::IEWIdx); } template diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 6fd3c6d0b3..d745faf7bd 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -68,7 +68,7 @@ class InstructionQueue { public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::Params Params; @@ -80,7 +80,7 @@ class InstructionQueue // Typedef of iterator through the list of instructions. typedef typename std::list::iterator ListIt; - friend class Impl::FullCPU; + friend class Impl::O3CPU; /** FU completion event class. */ class FUCompletion : public Event { @@ -125,7 +125,7 @@ class InstructionQueue void resetState(); /** Sets CPU pointer. */ - void setCPU(FullCPU *_cpu) { cpu = _cpu; } + void setCPU(O3CPU *_cpu) { cpu = _cpu; } /** Sets active threads list. */ void setActiveThreads(std::list *at_ptr); @@ -252,7 +252,7 @@ class InstructionQueue ///////////////////////// /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Cache interface. */ MemInterface *dcacheInterface; diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 1dbd46b8ed..89791fec95 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -44,7 +44,7 @@ template class LSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::LSQUnit LSQUnit; @@ -68,7 +68,7 @@ class LSQ { /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr); /** Switches out the LSQ. */ @@ -275,7 +275,7 @@ class LSQ { LSQUnit thread[Impl::MaxThreads]; /** The CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; /** The IEW stage pointer. */ IEW *iewStage; diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 0b6c6f542a..5173f8be11 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -126,7 +126,7 @@ LSQ::setActiveThreads(list *at_ptr) template void -LSQ::setCPU(FullCPU *cpu_ptr) +LSQ::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 2d700ddf15..cef6e0a2e7 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -61,7 +61,7 @@ class LSQUnit { typedef TheISA::IntReg IntReg; public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; typedef typename Impl::CPUPol::IssueStruct IssueStruct; @@ -81,7 +81,7 @@ class LSQUnit { void regStats(); /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the IEW stage pointer. */ void setIEW(IEW *iew_ptr) @@ -232,7 +232,7 @@ class LSQUnit { private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to the IEW stage. */ IEW *iewStage; @@ -249,13 +249,13 @@ class LSQUnit { { protected: /** Pointer to CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to LSQ. */ LSQUnit *lsq; public: /** Default constructor. */ - DcachePort(FullCPU *_cpu, LSQUnit *_lsq) + DcachePort(O3CPU *_cpu, LSQUnit *_lsq) : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) { } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 6e201ea5f2..f4a656aa19 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -29,6 +29,8 @@ * Korey Sewell */ +#include "config/use_checker.hh" + #include "cpu/checker/cpu.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" @@ -171,7 +173,7 @@ LSQUnit::init(Params *params, unsigned maxLQEntries, template void -LSQUnit::setCPU(FullCPU *cpu_ptr) +LSQUnit::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; dcachePort = new DcachePort(cpu, this); @@ -180,9 +182,11 @@ LSQUnit::setCPU(FullCPU *cpu_ptr) dcachePort->setPeer(mem_dport); mem_dport->setPeer(dcachePort); +#if USE_CHECKER if (cpu->checker) { cpu->checker->setDcachePort(dcachePort); } +#endif } template @@ -788,9 +792,11 @@ LSQUnit::storePostSend(Packet *pkt) // only works so long as the checker doesn't try to // verify the value in memory for stores. storeQueue[storeWBIdx].inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { cpu->checker->verify(storeQueue[storeWBIdx].inst); } +#endif } if (pkt->result != Packet::Success) { @@ -884,9 +890,11 @@ LSQUnit::completeStore(int store_idx) // Tell the checker we've completed this instruction. Some stores // may get reported twice to the checker, but the checker can // handle that case. +#if USE_CHECKER if (cpu->checker) { cpu->checker->verify(storeQueue[store_idx].inst); } +#endif } template diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index ade5e4e56e..6972f055f1 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -72,7 +72,7 @@ class PhysRegFile // Will make these registers public for now, but they probably should // be private eventually with some accessor functions. public: - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; /** * Constructs a physical register file with the specified amount of @@ -278,11 +278,11 @@ class PhysRegFile private: /** CPU pointer. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + void setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; } /** Number of physical integer registers. */ unsigned numPhysicalIntRegs; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 42fdf6bf5d..581fc8f817 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -55,7 +55,7 @@ class DefaultRename // Typedefs from the Impl. typedef typename Impl::CPUPol CPUPol; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::Params Params; // Typedefs from the CPUPol @@ -115,7 +115,7 @@ class DefaultRename void regStats(); /** Sets CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets the main backwards communication time buffer pointer. */ void setTimeBuffer(TimeBuffer *tb_ptr); @@ -291,7 +291,7 @@ class DefaultRename std::list historyBuffer[Impl::MaxThreads]; /** Pointer to CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Pointer to main time buffer used for backwards communication. */ TimeBuffer *timeBuffer; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 307022cb8e..df8b7f9dab 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -162,7 +162,7 @@ DefaultRename::regStats() template void -DefaultRename::setCPU(FullCPU *cpu_ptr) +DefaultRename::setCPU(O3CPU *cpu_ptr) { DPRINTF(Rename, "Setting CPU pointer.\n"); cpu = cpu_ptr; @@ -755,7 +755,7 @@ DefaultRename::updateStatus() DPRINTF(Activity, "Activating stage.\n"); - cpu->activateStage(FullCPU::RenameIdx); + cpu->activateStage(O3CPU::RenameIdx); } } else { // If it's not unblocking, then rename will not have any internal @@ -764,7 +764,7 @@ DefaultRename::updateStatus() _status = Inactive; DPRINTF(Activity, "Deactivating stage.\n"); - cpu->deactivateStage(FullCPU::RenameIdx); + cpu->deactivateStage(O3CPU::RenameIdx); } } } diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6d1402531f..b98d7c4c2a 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -45,7 +45,7 @@ class ROB typedef TheISA::RegIndex RegIndex; public: //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef std::pair UnmapInfo; @@ -90,7 +90,7 @@ class ROB * is created within. * @param cpu_ptr Pointer to the implementation specific full CPU object. */ - void setCPU(FullCPU *cpu_ptr); + void setCPU(O3CPU *cpu_ptr); /** Sets pointer to the list of active threads. * @param at_ptr Pointer to the list of active threads. @@ -257,7 +257,7 @@ class ROB private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; /** Active Threads in CPU */ std::list* activeThreads; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 5a941834b6..6277dd68be 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -100,7 +100,7 @@ ROB::name() const template void -ROB::setCPU(FullCPU *cpu_ptr) +ROB::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index b6535baa14..19cbffb444 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -58,11 +58,11 @@ class Process; template struct O3ThreadState : public ThreadState { typedef ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::O3CPU O3CPU; private: /** Pointer to the CPU. */ - FullCPU *cpu; + O3CPU *cpu; public: /** Whether or not the thread is currently in syscall mode, and * thus able to be externally updated without squashing. @@ -75,12 +75,12 @@ struct O3ThreadState : public ThreadState { bool trapPending; #if FULL_SYSTEM - O3ThreadState(FullCPU *_cpu, int _thread_num) + O3ThreadState(O3CPU *_cpu, int _thread_num) : ThreadState(-1, _thread_num), inSyscall(0), trapPending(0) { } #else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid, + O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) : ThreadState(-1, _thread_num, mem, _process, _asid), cpu(_cpu), inSyscall(0), trapPending(0) diff --git a/src/python/m5/objects/AlphaFullCPU.py b/src/python/m5/objects/AlphaO3CPU.py similarity index 98% rename from src/python/m5/objects/AlphaFullCPU.py rename to src/python/m5/objects/AlphaO3CPU.py index 2988305d3c..f14f8c88ed 100644 --- a/src/python/m5/objects/AlphaFullCPU.py +++ b/src/python/m5/objects/AlphaO3CPU.py @@ -2,8 +2,8 @@ from m5 import build_env from m5.config import * from BaseCPU import BaseCPU -class DerivAlphaFullCPU(BaseCPU): - type = 'DerivAlphaFullCPU' +class DerivAlphaO3CPU(BaseCPU): + type = 'DerivAlphaO3CPU' activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") From f4d0f92855a505448ddac4367d5d5c698d1e4282 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 17:15:18 -0400 Subject: [PATCH 032/152] Miscellaneous minor fixes. src/cpu/checker/cpu.cc: Add in comment. src/cpu/cpuevent.hh: Fix up comment. src/cpu/o3/bpred_unit.cc: Comment out Ozone instantiations. src/cpu/o3/dep_graph.hh: Include destructor. --HG-- extra : convert_revision : 549454ed11bc2fa49a0627f7fb8f96d00a9be303 --- src/cpu/checker/cpu.cc | 2 +- src/cpu/cpuevent.hh | 2 +- src/cpu/o3/bpred_unit.cc | 4 ++-- src/cpu/o3/dep_graph.hh | 8 ++++++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc index 99189a8966..45c57d276a 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu.cc @@ -829,6 +829,6 @@ Checker::dumpInsts() //template //class Checker > >; - +// Manually instantiate checker template class Checker > >; diff --git a/src/cpu/cpuevent.hh b/src/cpu/cpuevent.hh index 11ac7aafba..9dfae27cf3 100644 --- a/src/cpu/cpuevent.hh +++ b/src/cpu/cpuevent.hh @@ -36,7 +36,7 @@ class ThreadContext; -/** This class creates a global list of events than need a pointer to an +/** This class creates a global list of events that need a pointer to a * thread context. When a switchover takes place the events can be migrated * to the new thread context, otherwise you could have a wake timer interrupt * go off on a switched out cpu or other unfortunate events. This object MUST be diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index b33543bdc6..2944387045 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -31,9 +31,9 @@ #include "cpu/o3/bpred_unit_impl.hh" #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/ozone_impl.hh" //#include "cpu/ozone/simple_impl.hh" template class BPredUnit; -template class BPredUnit; +//template class BPredUnit; //template class BPredUnit; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh index 3659b1a37e..c19fd0abff 100644 --- a/src/cpu/o3/dep_graph.hh +++ b/src/cpu/o3/dep_graph.hh @@ -68,6 +68,8 @@ class DependencyGraph : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) { } + ~DependencyGraph(); + /** Resize the dependency graph to have num_entries registers. */ void resize(int num_entries); @@ -120,6 +122,12 @@ class DependencyGraph uint64_t nodesRemoved; }; +template +DependencyGraph::~DependencyGraph() +{ + delete [] dependGraph; +} + template void DependencyGraph::resize(int num_entries) From def9ea38b561676a89959882f92d3520a2e0224f Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 17:19:36 -0400 Subject: [PATCH 033/152] Add in exec_context.hh, which is a file for documentation purposes only. It describes the ExecContext interface that the ISA uses to access CPU state. Also #ifdef Erik's old copy code from the decoder so ExecContext doesn't need his two specific copy functions. src/arch/alpha/isa/decoder.isa: Surround Erik's old copy code with #ifdefs. This way the copy functions don't need to be included in the ExecContext (until somebody decides to add them back in). --HG-- extra : convert_revision : 508ca387757a32bb616e5b4b07af17787a76970e --- src/arch/alpha/isa/decoder.isa | 4 + src/cpu/exec_context.hh | 161 +++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 src/cpu/exec_context.hh diff --git a/src/arch/alpha/isa/decoder.isa b/src/arch/alpha/isa/decoder.isa index d2908b27a2..f449d2d697 100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@ -47,9 +47,11 @@ decode OPCODE default Unknown::unknown() { 0x23: ldt({{ Fa = Mem.df; }}); 0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED); 0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED); +#ifdef USE_COPY 0x20: MiscPrefetch::copy_load({{ EA = Ra; }}, {{ fault = xc->copySrcTranslate(EA); }}, inst_flags = [IsMemRef, IsLoad, IsCopy]); +#endif } format LoadOrPrefetch { @@ -69,9 +71,11 @@ decode OPCODE default Unknown::unknown() { 0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }}); 0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }}); 0x27: stt({{ Mem.df = Fa; }}); +#ifdef USE_COPY 0x24: MiscPrefetch::copy_store({{ EA = Rb; }}, {{ fault = xc->copy(EA); }}, inst_flags = [IsMemRef, IsStore, IsCopy]); +#endif } format StoreCond { diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh new file mode 100644 index 0000000000..f6e8d7c255 --- /dev/null +++ b/src/cpu/exec_context.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#error "Cannot include this file" + +/** + * The ExecContext is not a usable class. It is simply here for + * documentation purposes. It shows the interface that is used by the + * ISA to access and change CPU state. + */ +class ExecContext { + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to reduce overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + /** Reads an integer register. */ + uint64_t readIntReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width. */ + FloatReg readFloatReg(const StaticInst *si, int idx, int width); + + /** Reads a floating point register of single register width. */ + FloatReg readFloatReg(const StaticInst *si, int idx); + + /** Reads a floating point register of a specific width in its + * binary format, instead of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width); + + /** Reads a floating point register in its binary format, instead + * of by value. */ + FloatRegBits readFloatRegBits(const StaticInst *si, int idx); + + /** Sets an integer register to a value. */ + void setIntReg(const StaticInst *si, int idx, uint64_t val); + + /** Sets a floating point register of a specific width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width); + + /** Sets a floating point register of single width to a value. */ + void setFloatReg(const StaticInst *si, int idx, FloatReg val); + + /** Sets the bits of a floating point register of a specific width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width); + + /** Sets the bits of a floating point register of single width + * to a binary value. */ + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val); + + /** Reads the PC. */ + uint64_t readPC(); + /** Reads the NextPC. */ + uint64_t readNextPC(); + /** Reads the Next-NextPC. Only for architectures like SPARC or MIPS. */ + uint64_t readNextNPC(); + + /** Sets the PC. */ + void setPC(uint64_t val); + /** Sets the NextPC. */ + void setNextPC(uint64_t val); + /** Sets the Next-NextPC. Only for architectures like SPARC or MIPS. */ + void setNextNPC(uint64_t val); + + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg); + + /** Reads a miscellaneous register, handling any architectural + * side effects due to reading that register. */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a miscellaneous register, handling any architectural + * side effects due to writing that register. */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Records the effective address of the instruction. Only valid + * for memory ops. */ + void setEA(Addr EA); + /** Returns the effective address of the instruction. Only valid + * for memory ops. */ + Addr getEA(); + + /** Returns a pointer to the ThreadContext. */ + ThreadContext *tcBase(); + + /** Reads an address, creating a memory request with the given + * flags. Stores result of read in data. */ + template + Fault read(Addr addr, T &data, unsigned flags); + + /** Writes to an address, creating a memory request with the given + * flags. Writes data to memory. For store conditionals, returns + * the result of the store in res. */ + template + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + /** Prefetches an address, creating a memory request with the + * given flags. */ + void prefetch(Addr addr, unsigned flags); + + /** Hints to the memory system that an address will be written to + * soon, with the given size. Creates a memory request with the + * given flags. */ + void writeHint(Addr addr, int size, unsigned flags); + +#if FULL_SYSTEM + /** Somewhat Alpha-specific function that handles returning from + * an error or interrupt. */ + Fault hwrei(); + /** Reads the interrupt flags. */ + int readIntrFlag(); + /** Sets the interrupt flags to a value. */ + void setIntrFlag(int val); + + /** + * Check for special simulator handling of specific PAL calls. If + * return value is false, actual PAL call will be suppressed. + */ + bool simPalCheck(int palFunc); +#else + /** Executes a syscall specified by the callnum. */ + void syscall(int64_t callnum); +#endif +}; From 0bbd909f02e72a321a65b933104c5ef1e157116b Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 17:52:15 -0400 Subject: [PATCH 034/152] Reorganization to move FuncUnit, FUDesc, and OpDesc out of the encumbered directory and into the normal cpu directory. src/SConscript: Split off FuncUnits from old FUPool so I'm not including encumbered code. This was all written by Steve Raasch so it's safe to include in the main tree. src/cpu/o3/fu_pool.cc: Include the func unit file that's not in the encumbered directory. --HG-- extra : convert_revision : 9801c606961dd2d62dba190d13a76069992bf241 --- src/SConscript | 3 +- src/cpu/func_unit.cc | 171 ++++++++++++++++++++++++++++++ src/cpu/func_unit.hh | 101 ++++++++++++++++++ src/cpu/o3/fu_pool.cc | 2 +- src/python/m5/objects/FuncUnit.py | 17 +++ 5 files changed, 291 insertions(+), 3 deletions(-) create mode 100644 src/cpu/func_unit.cc create mode 100644 src/cpu/func_unit.hh create mode 100644 src/python/m5/objects/FuncUnit.py diff --git a/src/SConscript b/src/SConscript index a1c18711c7..0f21891c42 100644 --- a/src/SConscript +++ b/src/SConscript @@ -85,6 +85,7 @@ base_sources = Split(''' cpu/base.cc cpu/cpuevent.cc cpu/exetrace.cc + cpu/func_unit.cc cpu/op_class.cc cpu/pc_event.cc cpu/quiesce_event.cc @@ -93,8 +94,6 @@ base_sources = Split(''' cpu/simple_thread.cc cpu/thread_state.cc - encumbered/cpu/full/fu_pool.cc - mem/bridge.cc mem/bus.cc mem/connector.cc diff --git a/src/cpu/func_unit.cc b/src/cpu/func_unit.cc new file mode 100644 index 0000000000..c20578a43b --- /dev/null +++ b/src/cpu/func_unit.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#include + +#include "base/misc.hh" +#include "cpu/func_unit.hh" +#include "sim/builder.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////////// +// +// The funciton unit +// +FuncUnit::FuncUnit() +{ + capabilityList.reset(); +} + + +// Copy constructor +FuncUnit::FuncUnit(const FuncUnit &fu) +{ + + for (int i = 0; i < Num_OpClasses; ++i) { + opLatencies[i] = fu.opLatencies[i]; + issueLatencies[i] = fu.issueLatencies[i]; + } + + capabilityList = fu.capabilityList; +} + + +void +FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat) +{ + if (issuelat == 0 || oplat == 0) + panic("FuncUnit: you don't really want a zero-cycle latency do you?"); + + capabilityList.set(cap); + + opLatencies[cap] = oplat; + issueLatencies[cap] = issuelat; +} + +bool +FuncUnit::provides(OpClass capability) +{ + return capabilityList[capability]; +} + +bitset +FuncUnit::capabilities() +{ + return capabilityList; +} + +unsigned & +FuncUnit::opLatency(OpClass cap) +{ + return opLatencies[cap]; +} + +unsigned +FuncUnit::issueLatency(OpClass capability) +{ + return issueLatencies[capability]; +} + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// We use 2 objects to specify this data in the INI file: +// (1) OpDesc - Describes the operation class & latencies +// (multiple OpDesc objects can refer to the same +// operation classes) +// (2) FUDesc - Describes the operations available in the unit & +// the number of these units +// +// + + +// +// The operation-class description object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + + SimpleEnumParam opClass; + Param opLat; + Param issueLat; + +END_DECLARE_SIM_OBJECT_PARAMS(OpDesc) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OpDesc) + + INIT_ENUM_PARAM(opClass, "type of operation", opClassStrings), + INIT_PARAM(opLat, "cycles until result is available"), + INIT_PARAM(issueLat, "cycles until another can be issued") + +END_INIT_SIM_OBJECT_PARAMS(OpDesc) + + +CREATE_SIM_OBJECT(OpDesc) +{ + return new OpDesc(getInstanceName(), opClass, opLat, issueLat); +} + +REGISTER_SIM_OBJECT("OpDesc", OpDesc) + + +// +// The FuDesc object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + SimObjectVectorParam opList; + Param count; + +END_DECLARE_SIM_OBJECT_PARAMS(FUDesc) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUDesc) + + INIT_PARAM(opList, "list of operation classes for this FU type"), + INIT_PARAM(count, "number of these FU's available") + +END_INIT_SIM_OBJECT_PARAMS(FUDesc) + + +CREATE_SIM_OBJECT(FUDesc) +{ + return new FUDesc(getInstanceName(), opList, count); +} + +REGISTER_SIM_OBJECT("FUDesc", FUDesc) + diff --git a/src/cpu/func_unit.hh b/src/cpu/func_unit.hh new file mode 100644 index 0000000000..7801430969 --- /dev/null +++ b/src/cpu/func_unit.hh @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2002-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Raasch + */ + +#ifndef __CPU_FUNC_UNIT_HH__ +#define __CPU_FUNC_UNIT_HH__ + +#include +#include +#include + +#include "cpu/op_class.hh" +#include "sim/sim_object.hh" + +//////////////////////////////////////////////////////////////////////////// +// +// Structures used ONLY during the initialization phase... +// +// +// + +struct OpDesc : public SimObject +{ + OpClass opClass; + unsigned opLat; + unsigned issueLat; + + OpDesc(std::string name, OpClass c, unsigned o, unsigned i) + : SimObject(name), opClass(c), opLat(o), issueLat(i) {}; +}; + +struct FUDesc : public SimObject +{ + std::vector opDescList; + unsigned number; + + FUDesc(std::string name, std::vector l, unsigned n) + : SimObject(name), opDescList(l), number(n) {}; +}; + +typedef std::vector::iterator OPDDiterator; +typedef std::vector::iterator FUDDiterator; + + + + +//////////////////////////////////////////////////////////////////////////// +// +// The actual FU object +// +// +// +class FuncUnit +{ + private: + unsigned opLatencies[Num_OpClasses]; + unsigned issueLatencies[Num_OpClasses]; + std::bitset capabilityList; + + public: + FuncUnit(); + FuncUnit(const FuncUnit &fu); + + std::string name; + + void addCapability(OpClass cap, unsigned oplat, unsigned issuelat); + + bool provides(OpClass capability); + std::bitset capabilities(); + + unsigned &opLatency(OpClass capability); + unsigned issueLatency(OpClass capability); +}; + +#endif // __FU_POOL_HH__ diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc index 545deea9b4..42e329acaa 100644 --- a/src/cpu/o3/fu_pool.cc +++ b/src/cpu/o3/fu_pool.cc @@ -31,7 +31,7 @@ #include #include "cpu/o3/fu_pool.hh" -#include "encumbered/cpu/full/fu_pool.hh" +#include "cpu/func_unit.hh" #include "sim/builder.hh" using namespace std; diff --git a/src/python/m5/objects/FuncUnit.py b/src/python/m5/objects/FuncUnit.py new file mode 100644 index 0000000000..f61590ae9c --- /dev/null +++ b/src/python/m5/objects/FuncUnit.py @@ -0,0 +1,17 @@ +from m5.config import * + +class OpType(Enum): + vals = ['(null)', 'IntAlu', 'IntMult', 'IntDiv', 'FloatAdd', + 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', 'FloatSqrt', + 'MemRead', 'MemWrite', 'IprAccess', 'InstPrefetch'] + +class OpDesc(SimObject): + type = 'OpDesc' + issueLat = Param.Int(1, "cycles until another can be issued") + opClass = Param.OpType("type of operation") + opLat = Param.Int(1, "cycles until result is available") + +class FUDesc(SimObject): + type = 'FUDesc' + count = Param.Int("number of these FU's available") + opList = VectorParam.OpDesc("operation classes for this FU type") From aa1efe3e72e40526e1db3f99c1fbb69d3c12d28c Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 18:04:34 -0400 Subject: [PATCH 035/152] Update this with the same option as single_fs.py --HG-- extra : convert_revision : 778d654f515b6af7c45165b0a9bc5ef0d60f0d19 --- configs/test/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/test/test.py b/configs/test/test.py index 76791d9d78..2b5a6769f5 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -13,7 +13,7 @@ parser = optparse.OptionParser(option_list=m5.standardOptions) parser.add_option("-c", "--cmd", default="hello") parser.add_option("-t", "--timing", action="store_true") -parser.add_option("-f", "--full", action="store_true") +parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-m", "--maxtick", type="int") (options, args) = parser.parse_args() @@ -34,7 +34,7 @@ mem = PhysicalMemory() if options.timing: cpu = TimingSimpleCPU() -elif options.full: +elif options.detailed: cpu = DetailedO3CPU() else: cpu = AtomicSimpleCPU() From e889b8242301b1123ffd4c05862f84826dd77806 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 16 Jun 2006 21:18:19 -0400 Subject: [PATCH 036/152] Add in some of the commonly used Trace/ExeTrace/Debug options. src/python/m5/__init__.py: Add in some of the commonly used Trace/ExeTrace/Debug options. Not terribly clean but it works. --HG-- extra : convert_revision : abb3cb4892512483a5031606baabf6540019233c --- src/python/m5/__init__.py | 93 ++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 10 deletions(-) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index f849a899be..c0728120c5 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -67,15 +67,46 @@ def setTraceFlags(option, opt_str, value, parser): def setTraceStart(option, opt_str, value, parser): objects.Trace.start = value -def clearPCSymbol(option, opt_str, value, parser): - objects.ExecutionTrace.pc_symbol = False +def setTraceFile(option, opt_str, value, parser): + objects.Trace.file = value -def clearPrintCycle(option, opt_str, value, parser): - objects.ExecutionTrace.print_cycle = False +def usePCSymbol(option, opt_str, value, parser): + objects.ExecutionTrace.pc_symbol = value + +def printCycle(option, opt_str, value, parser): + objects.ExecutionTrace.print_cycle = value + +def printOp(option, opt_str, value, parser): + objects.ExecutionTrace.print_opclass = value + +def printThread(option, opt_str, value, parser): + objects.ExecutionTrace.print_thread = value + +def printEA(option, opt_str, value, parser): + objects.ExecutionTrace.print_effaddr = value + +def printData(option, opt_str, value, parser): + objects.ExecutionTrace.print_data = value + +def printFetchseq(option, opt_str, value, parser): + objects.ExecutionTrace.print_fetchseq = value + +def printCpseq(option, opt_str, value, parser): + objects.ExecutionTrace.print_cpseq = value + +def dumpOnExit(option, opt_str, value, parser): + objects.Trace.dump_on_exit = value + +def debugBreak(option, opt_str, value, parser): + objects.Debug.break_cycles = value def statsTextFile(option, opt_str, value, parser): objects.Statistics.text_file = value +# Extra list to help for options that are true or false +TrueOrFalse = ['True', 'False'] +TorF = "True | False" + # Standard optparse options. Need to be explicitly included by the # user script when it calls optparse.OptionParser(). standardOptions = [ @@ -83,12 +114,54 @@ standardOptions = [ callback=setTraceFlags), optparse.make_option("--tracestart", type="int", action="callback", callback=setTraceStart), - optparse.make_option("--nopcsymbol", action="callback", - callback=clearPCSymbol, - help="Turn off printing PC symbols in trace output"), - optparse.make_option("--noprintcycle", action="callback", - callback=clearPrintCycle, - help="Turn off printing cycles in trace output"), + optparse.make_option("--tracefile", type="string", action="callback", + callback=setTraceFile), + optparse.make_option("--pcsymbol", type="choice", choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=usePCSymbol, + help="Use PC symbols in trace output"), + optparse.make_option("--printcycle", type="choice", choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printCycle, + help="Print cycle numbers in trace output"), + optparse.make_option("--printopclass", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printOp, + help="Print cycle numbers in trace output"), + optparse.make_option("--printthread", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printThread, + help="Print thread number in trace output"), + optparse.make_option("--printeffaddr", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printEA, + help="Print effective address in trace output"), + optparse.make_option("--printdata", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printData, + help="Print result data in trace output"), + optparse.make_option("--printfetchseq", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printFetchseq, + help="Print fetch sequence numbers in trace output"), + optparse.make_option("--printcpseq", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=printCpseq, + help="Print correct path sequence numbers in trace output"), + optparse.make_option("--dumponexit", type="choice", + choices=TrueOrFalse, + default="True", metavar=TorF, + action="callback", callback=dumpOnExit, + help="Dump trace buffer on exit"), + optparse.make_option("--debugbreak", type="int", metavar="CYCLE", + action="callback", callback=debugBreak, + help="Cycle to create a breakpoint"), optparse.make_option("--statsfile", type="string", action="callback", callback=statsTextFile, metavar="FILE", help="Sets the output file for the statistics") From 7efd0eafd8e5bb7a9ff088d56f1de3bd871b5a2b Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 17 Jun 2006 09:26:08 -0400 Subject: [PATCH 037/152] Minor fixes in comments. SConstruct: Fix paths in comments and other minor comment edits. src/cpu/SConscript: Fix path in comment. --HG-- extra : convert_revision : c02aa9cefd8c5ad791ad2f1653c1554a4aa8ffbd --- SConstruct | 19 +++++++++++-------- src/cpu/SConscript | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/SConstruct b/SConstruct index c9ba13679c..8ddd07c636 100644 --- a/SConstruct +++ b/SConstruct @@ -39,17 +39,20 @@ # # You can build M5 in a different directory as long as there is a # 'build/' somewhere along the target path. The build system -# expdects that all configs under the same build directory are being +# expects that all configs under the same build directory are being # built for the same host system. # # Examples: -# These two commands are equivalent. The '-u' option tells scons to -# search up the directory tree for this SConstruct file. +# +# The following two commands are equivalent. The '-u' option tells +# scons to search up the directory tree for this SConstruct file. # % cd /m5 ; scons build/ALPHA_FS/m5.debug # % cd /m5/build/ALPHA_FS; scons -u m5.debug -# These two commands are equivalent and demonstrate building in a -# directory outside of the source tree. The '-C' option tells scons -# to chdir to the specified directory to find this SConstruct file. +# +# The following two commands are equivalent and demonstrate building +# in a directory outside of the source tree. The '-C' option tells +# scons to chdir to the specified directory to find this SConstruct +# file. # % cd /m5 ; scons /local/foo/build/ALPHA_FS/m5.debug # % cd /local/foo/build/ALPHA_FS; scons -C /m5 m5.debug # @@ -302,7 +305,7 @@ nonsticky_opts.AddOptions( BoolOption('update_ref', 'Update test reference outputs', False) ) -# These options get exported to #defines in config/*.hh (see m5/SConscript). +# These options get exported to #defines in config/*.hh (see src/SConscript). env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \ 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \ 'USE_CHECKER'] @@ -488,7 +491,7 @@ for build_path in build_paths: if env['USE_SSE2']: env.Append(CCFLAGS='-msse2') - # The m5/SConscript file sets up the build rules in 'env' according + # The src/SConscript file sets up the build rules in 'env' according # to the configured options. It returns a list of environments, # one for each variant build (debug, opt, etc.) envList = SConscript('src/SConscript', build_dir = build_path, diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 608625ed4a..f855682a1c 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -184,7 +184,7 @@ if 'CheckerCPU' in env['CPU_MODELS']: Exit(1) -# FullCPU sources are included from m5/SConscript since they're not +# FullCPU sources are included from src/SConscript since they're not # below this point in the file hierarchy. # Convert file names to SCons File objects. This takes care of the From 4a9c0a7dfc8aa1fcd70ec2b194691adec9ce424e Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 17 Jun 2006 09:58:10 -0400 Subject: [PATCH 038/152] Add --outdir option. Didn't call it "-d" since that's already being used for "detailed cpu". Needed to add extra function for user script to pass parsed options back to m5 module. configs/test/fs.py: configs/test/test.py: Call setStandardOptions(). src/python/m5/__init__.py: Add --outdir option. Add setStandardOptions() so user script can pass parsed options back to m5 module. src/sim/main.cc: Add SWIG-wrappable function to set output dir. --HG-- extra : convert_revision : 1323bee69ca920c699a1cd1218e15b7b0875c1e5 --- configs/test/fs.py | 1 + configs/test/test.py | 1 + src/python/m5/__init__.py | 17 ++++++++++++++++- src/sim/main.cc | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/configs/test/fs.py b/configs/test/fs.py index c742e916ca..aa530dd55c 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -8,6 +8,7 @@ parser = optparse.OptionParser(option_list=m5.standardOptions) parser.add_option("-t", "--timing", action="store_true") (options, args) = parser.parse_args() +m5.setStandardOptions(options) if args: print "Error: script doesn't take any positional arguments" diff --git a/configs/test/test.py b/configs/test/test.py index 2b5a6769f5..a570c1a086 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -17,6 +17,7 @@ parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-m", "--maxtick", type="int") (options, args) = parser.parse_args() +m5.setStandardOptions(options) if args: print "Error: script doesn't take any positional arguments" diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index c0728120c5..19af24e6f6 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -58,6 +58,20 @@ def AddToPath(path): sys.path.insert(1, path) +# The m5 module's pointer to the parsed options object +options = None + + +# User should call this function after calling parse_args() to pass +# parsed standard option values back into the m5 module for +# processing. +def setStandardOptions(_options): + # Set module global var + global options + options = _options + # tell C++ about output directory + main.setOutputDir(options.outdir) + # Callback to set trace flags. Not necessarily the best way to do # things in the long run (particularly if we change how these global # options are handled). @@ -110,6 +124,7 @@ TorF = "True | False" # Standard optparse options. Need to be explicitly included by the # user script when it calls optparse.OptionParser(). standardOptions = [ + optparse.make_option("--outdir", type="string", default="."), optparse.make_option("--traceflags", type="string", action="callback", callback=setTraceFlags), optparse.make_option("--tracestart", type="int", action="callback", @@ -187,7 +202,7 @@ def resolveSimObject(name): def instantiate(root): config.ticks_per_sec = float(root.clock.frequency) # ugly temporary hack to get output to config.ini - sys.stdout = file('config.ini', 'w') + sys.stdout = file(os.path.join(options.outdir, 'config.ini'), 'w') root.print_ini() sys.stdout.close() # close config.ini sys.stdout = sys.__stdout__ # restore to original diff --git a/src/sim/main.cc b/src/sim/main.cc index f63aec9cc4..fc057ea6f5 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -297,6 +297,14 @@ main(int argc, char **argv) Py_Finalize(); } + +void +setOutputDir(const string &dir) +{ + simout.setDirectory(dir); +} + + IniFile inifile; SimObject * From d96d28e56d39eec0baa1377779119495cfbf4701 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 17 Jun 2006 12:08:19 -0400 Subject: [PATCH 039/152] Rename SWIG "main" module to "cc_main" so it's clear from the Python side that this is the interface to C++. src/SConscript: main_wrap.cc -> cc_main_wrap.cc src/python/SConscript: src/python/m5/__init__.py: src/sim/main.cc: s/main/cc_main/ src/python/m5/config.py: s/main/cc_main/ Also directly import cc_main so we don't need to put the "m5." in front all the time. --HG-- extra : convert_revision : 755552f70cf671881ff31e476c677b95ef12950d --- src/SConscript | 2 +- src/python/SConscript | 6 +++--- src/python/m5/__init__.py | 14 +++++++------- src/python/m5/config.py | 6 +++--- src/sim/main.cc | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/SConscript b/src/SConscript index 933158f5fd..157a911edb 100644 --- a/src/SConscript +++ b/src/SConscript @@ -106,7 +106,7 @@ base_sources = Split(''' sim/eventq.cc sim/faults.cc sim/main.cc - python/swig/main_wrap.cc + python/swig/cc_main_wrap.cc sim/param.cc sim/profile.cc sim/root.cc diff --git a/src/python/SConscript b/src/python/SConscript index 7b0f591eb8..3a9def9a81 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -87,12 +87,12 @@ addPkg('m5') pyzip_files.append('m5/defines.py') pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py')) -env.Command(['swig/main_wrap.cc', 'm5/main.py'], - 'swig/main.i', +env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'], + 'swig/cc_main.i', '$SWIG $SWIGFLAGS -outdir ${TARGETS[1].dir} ' '-o ${TARGETS[0]} $SOURCES') -pyzip_dep_files.append('m5/main.py') +pyzip_dep_files.append('m5/cc_main.py') # Action function to build the zip archive. Uses the PyZipFile module # included in the standard Python library. diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 19af24e6f6..ac69042770 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -30,11 +30,11 @@ import sys, os, time, atexit, optparse # import the SWIG-wrapped main C++ functions -import main +import cc_main # import a few SWIG-wrapped items (those that are likely to be used # directly by user scripts) completely into this module for # convenience -from main import simulate, SimLoopExitEvent +from cc_main import simulate, SimLoopExitEvent # import the m5 compile options import defines @@ -70,7 +70,7 @@ def setStandardOptions(_options): global options options = _options # tell C++ about output directory - main.setOutputDir(options.outdir) + cc_main.setOutputDir(options.outdir) # Callback to set trace flags. Not necessarily the best way to do # things in the long run (particularly if we change how these global @@ -206,10 +206,10 @@ def instantiate(root): root.print_ini() sys.stdout.close() # close config.ini sys.stdout = sys.__stdout__ # restore to original - main.loadIniFile(resolveSimObject) # load config.ini into C++ + cc_main.loadIniFile(resolveSimObject) # load config.ini into C++ root.createCCObject() root.connectPorts() - main.finalInit() + cc_main.finalInit() noDot = True # temporary until we fix dot if not noDot: dot = pydot.Dot() @@ -223,10 +223,10 @@ def instantiate(root): # Export curTick to user script. def curTick(): - return main.cvar.curTick + return cc_main.cvar.curTick # register our C++ exit callback function with Python -atexit.register(main.doExitCleanup) +atexit.register(cc_main.doExitCleanup) # This import allows user scripts to reference 'm5.objects.Foo' after # just doing an 'import m5' (without an 'import m5.objects'). May not diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 058e725782..c29477465b 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -30,7 +30,7 @@ import os, re, sys, types, inspect, copy import m5 -from m5 import panic +from m5 import panic, cc_main from convert import * from multidict import multidict @@ -529,7 +529,7 @@ class SimObject(object): def getCCObject(self): if not self._ccObject: self._ccObject = -1 # flag to catch cycles in recursion - self._ccObject = m5.main.createSimObject(self.path()) + self._ccObject = cc_main.createSimObject(self.path()) elif self._ccObject == -1: raise RuntimeError, "%s: recursive call to getCCObject()" \ % self.path() @@ -1443,7 +1443,7 @@ class PortRef(object): if self.ccConnected: # already done this return peer = self.peer - m5.main.connectPorts(self.simobj.getCCObject(), self.name, self.index, + cc_main.connectPorts(self.simobj.getCCObject(), self.name, self.index, peer.simobj.getCCObject(), peer.name, peer.index) self.ccConnected = True peer.ccConnected = True diff --git a/src/sim/main.cc b/src/sim/main.cc index fc057ea6f5..7a0fbfa614 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -166,7 +166,7 @@ sayHello(ostream &out) } -extern "C" { void init_main(); } +extern "C" { void init_cc_main(); } int main(int argc, char **argv) @@ -258,8 +258,8 @@ main(int argc, char **argv) Py_Initialize(); PySys_SetArgv(argc, argv); - // initialize SWIG 'main' module - init_main(); + // initialize SWIG 'cc_main' module + init_cc_main(); if (argc > 0) { // extra arg(s): first is script file, remaining ones are args From e76a12834b7badeba86300b419cb5bd5fc911495 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 17 Jun 2006 17:49:38 -0400 Subject: [PATCH 040/152] Add myself to list of authors --HG-- extra : convert_revision : 0c9a892127476d586fb8bff1b0eb7342c6b1f166 --- src/arch/alpha/system.cc | 1 + src/arch/alpha/system.hh | 1 + src/base/timebuf.hh | 3 ++- src/dev/ns_gige.cc | 3 ++- src/dev/ns_gige.hh | 3 ++- src/dev/platform.hh | 1 + src/kern/linux/events.cc | 3 ++- src/kern/linux/events.hh | 3 ++- src/kern/linux/printk.cc | 3 ++- src/kern/linux/printk.hh | 3 ++- src/kern/system_events.hh | 3 ++- src/sim/serialize.cc | 3 ++- src/sim/serialize.hh | 3 ++- 13 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc index 3aaba7d58c..dce7365aa0 100644 --- a/src/arch/alpha/system.cc +++ b/src/arch/alpha/system.cc @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Nathan Binkert */ #include "arch/alpha/ev5.hh" diff --git a/src/arch/alpha/system.hh b/src/arch/alpha/system.hh index b26a5e3018..0f4f64581f 100644 --- a/src/arch/alpha/system.hh +++ b/src/arch/alpha/system.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Nathan Binkert */ #ifndef __ARCH_ALPHA_SYSTEM_HH__ diff --git a/src/base/timebuf.hh b/src/base/timebuf.hh index 6a326d25ad..160a97034d 100644 --- a/src/base/timebuf.hh +++ b/src/base/timebuf.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Kevin Lim + * Authors: Nathan Binkert + * Kevin Lim */ #ifndef __BASE_TIMEBUF_HH__ diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index decffaf737..360fe8c9be 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu */ /** @file diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh index 2de11c9512..2f47026f38 100644 --- a/src/dev/ns_gige.hh +++ b/src/dev/ns_gige.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu */ /** @file diff --git a/src/dev/platform.hh b/src/dev/platform.hh index bfc229748f..0e6f4ba4a8 100644 --- a/src/dev/platform.hh +++ b/src/dev/platform.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Andrew Schultz + * Nathan Binkert */ /** diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc index 5ff7e26dbd..289ece5cea 100644 --- a/src/kern/linux/events.cc +++ b/src/kern/linux/events.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #include "arch/arguments.hh" diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh index 65f794a9cb..b0510c18f5 100644 --- a/src/kern/linux/events.hh +++ b/src/kern/linux/events.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #ifndef __KERN_LINUX_EVENTS_HH__ diff --git a/src/kern/linux/printk.cc b/src/kern/linux/printk.cc index e39a159822..004d1be2f7 100644 --- a/src/kern/linux/printk.cc +++ b/src/kern/linux/printk.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #include diff --git a/src/kern/linux/printk.hh b/src/kern/linux/printk.hh index f9203717ae..5ddf0a018d 100644 --- a/src/kern/linux/printk.hh +++ b/src/kern/linux/printk.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Ali Saidi + * Authors: Nathan Binkert + * Ali Saidi */ #ifndef __PRINTK_HH__ diff --git a/src/kern/system_events.hh b/src/kern/system_events.hh index ccd6bd9a4e..93b5eb5287 100644 --- a/src/kern/system_events.hh +++ b/src/kern/system_events.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Lisa Hsu + * Authors: Nathan Binkert + * Lisa Hsu * Ali Saidi */ diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 07e3b8a56b..0e31391166 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Erik Hallnor + * Authors: Nathan Binkert + * Erik Hallnor * Steve Reinhardt */ diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 1bcb235e6d..64ed6142fd 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Erik Hallnor + * Authors: Nathan Binkert + * Erik Hallnor * Steve Reinhardt */ From 306043f62e06e8368733c2ee2a273c1ba5ea80b9 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sat, 17 Jun 2006 17:50:11 -0400 Subject: [PATCH 041/152] remove profile.cc --HG-- extra : convert_revision : ac400789ee0cbd1cd01c28ffd149789dbd954613 --- src/SConscript | 1 - src/sim/main.cc | 5 ----- 2 files changed, 6 deletions(-) diff --git a/src/SConscript b/src/SConscript index 157a911edb..124f88708a 100644 --- a/src/SConscript +++ b/src/SConscript @@ -108,7 +108,6 @@ base_sources = Split(''' sim/main.cc python/swig/cc_main_wrap.cc sim/param.cc - sim/profile.cc sim/root.cc sim/serialize.cc sim/sim_events.cc diff --git a/src/sim/main.cc b/src/sim/main.cc index 5a99e15b46..bf844da7f3 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -427,11 +427,6 @@ finalInit() SimObject::regAllStats(); - // uncomment the following to get PC-based execution-time profile -#ifdef DO_PROFILE - init_profile((char *)&_init, (char *)&_fini); -#endif - // Check to make sure that the stats package is properly initialized Stats::check(); From 1edb2058929cf9b0d3cc7d9836549d199e592f9b Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sat, 17 Jun 2006 17:56:33 -0400 Subject: [PATCH 042/152] remove byte_swap.hh since it's not used --HG-- extra : convert_revision : 20120d34ad2ab28d9dd5ac2907b974c40e511e9e --- src/cpu/ozone/front_end_impl.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 467567c108..8082e01b9b 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -35,7 +35,6 @@ #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" #include "mem/mem_interface.hh" -#include "sim/byte_swap.hh" using namespace TheISA; From 78ea17ea30bbc67ffafcb0e23462b0968ebe188e Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 17 Jun 2006 18:14:16 -0400 Subject: [PATCH 043/152] Make the system paths more configurable configs/test/SysPaths.py: Make the paths more configurable --HG-- extra : convert_revision : c426b102dfe55e4b601a23e980e1b01140e0ee93 --- configs/test/SysPaths.py | 52 ++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py index c7c7db4e7f..9acfedc8b1 100644 --- a/configs/test/SysPaths.py +++ b/configs/test/SysPaths.py @@ -1,32 +1,42 @@ -from m5 import * +import os, sys +from os.path import isdir, join as joinpath +from os import environ as env -import os.path -import sys +systemdir = None +bindir = None +diskdir = None +scriptdir = None -# Edit the following list to include the possible paths to the binary -# and disk image directories. The first directory on the list that -# exists will be selected. -SYSTEMDIR_PATH = ['/n/poolfs/z/dist/m5/system'] +def load_defaults(): + global systemdir, bindir, diskdir, scriptdir + if not systemdir: + try: + path = env['M5_PATH'].split(':') + except KeyError: + path = [ '/dist/m5/system' ] -SYSTEMDIR = None -for d in SYSTEMDIR_PATH: - if os.path.exists(d): - SYSTEMDIR = d - break + for systemdir in path: + if os.path.isdir(systemdir): + break + else: + raise ImportError, "Can't find a path to system files." -if not SYSTEMDIR: - print >>sys.stderr, "Can't find a path to system files." - sys.exit(1) - -BINDIR = SYSTEMDIR + '/binaries' -DISKDIR = SYSTEMDIR + '/disks' + if not bindir: + bindir = joinpath(systemdir, 'binaries') + if not diskdir: + diskdir = joinpath(systemdir, 'disks') + if not scriptdir: + scriptdir = joinpath(systemdir, 'boot') def disk(file): - return os.path.join(DISKDIR, file) + load_defaults() + return joinpath(diskdir, file) def binary(file): - return os.path.join(BINDIR, file) + load_defaults() + return joinpath(bindir, file) def script(file): - return os.path.join(SYSTEMDIR, 'boot', file) + load_defaults() + return joinpath(scriptdir, file) From c698c03705f295459498a9a95ae8461f2867ebb0 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 17 Jun 2006 18:27:28 -0400 Subject: [PATCH 044/152] Fixes to compile under Cygwin. src/kern/linux/linux.hh: src/kern/solaris/solaris.hh: Rename BSD_HOST to the more specific NO_STAT64. src/sim/byteswap.hh: Replace set of swap_byte functions with a single templated version. Hope this fixes compiler issues with e.g. int32_t vs int disambiguation. src/sim/syscall_emul.hh: Rename BSD_HOST to the more specific NO_STAT64. Set this for __CYGWIN__. --HG-- extra : convert_revision : 86a63b4b60d2445a566321333381d79ba8ab63c4 --- src/kern/linux/linux.hh | 4 ++-- src/kern/solaris/solaris.hh | 4 ++-- src/sim/byteswap.hh | 36 +++++++++++++++++------------------- src/sim/syscall_emul.hh | 8 ++++---- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh index af5e23b95b..e3f554a22f 100644 --- a/src/kern/linux/linux.hh +++ b/src/kern/linux/linux.hh @@ -69,7 +69,7 @@ class Linux { typedef uint32_t gid_t; //@} -#if BSD_HOST +#if NO_STAT64 typedef struct stat hst_stat; typedef struct stat hst_stat64; #else @@ -176,7 +176,7 @@ class Linux { /// Helper function to convert a host stat buffer to a target stat /// buffer. Also copies the target buffer out to the simulated /// memory space. Used by stat(), fstat(), and lstat(). -#if !BSD_HOST +#if !NO_STAT64 static void copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host) { diff --git a/src/kern/solaris/solaris.hh b/src/kern/solaris/solaris.hh index 0fec0bcce0..b819fb6d26 100644 --- a/src/kern/solaris/solaris.hh +++ b/src/kern/solaris/solaris.hh @@ -74,7 +74,7 @@ class Solaris { typedef uint32_t nlink_t; //@} -#if BSD_HOST +#if NO_STAT64 typedef struct stat hst_stat; typedef struct stat hst_stat64; #else @@ -177,7 +177,7 @@ class Solaris { /// Helper function to convert a host stat buffer to a target stat /// buffer. Also copies the target buffer out to the simulated /// memory space. Used by stat(), fstat(), and lstat(). -#if !BSD_HOST +#if !NO_STAT64 static void copyOutStatBuf(TranslatingPort *mem, Addr addr, hst_stat *host) { diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh index a3138a25e9..03bfad954c 100644 --- a/src/sim/byteswap.hh +++ b/src/sim/byteswap.hh @@ -90,25 +90,23 @@ swap_byte16(uint16_t x) #endif } -//This lets the compiler figure out how to call the swap_byte functions above -//for different data types. -static inline uint64_t swap_byte(uint64_t x) {return swap_byte64(x);} -static inline int64_t swap_byte(int64_t x) {return swap_byte64((uint64_t)x);} -static inline uint32_t swap_byte(uint32_t x) {return swap_byte32(x);} -static inline int32_t swap_byte(int32_t x) {return swap_byte32((uint32_t)x);} -//This is to prevent the following two functions from compiling on -//64bit machines. It won't detect everything, so it should be changed. -#ifndef __x86_64__ -static inline long swap_byte(long x) {return swap_byte32((long)x);} -static inline unsigned long swap_byte(unsigned long x) - { return swap_byte32((unsigned long)x);} -#endif -static inline uint16_t swap_byte(uint16_t x) {return swap_byte32(x);} -static inline int16_t swap_byte(int16_t x) {return swap_byte16((uint16_t)x);} -static inline uint8_t swap_byte(uint8_t x) {return x;} -static inline int8_t swap_byte(int8_t x) {return x;} -static inline double swap_byte(double x) {return swap_byte64((uint64_t)x);} -static inline float swap_byte(float x) {return swap_byte32((uint32_t)x);} +// This function lets the compiler figure out how to call the +// swap_byte functions above for different data types. Since the +// sizeof() values are known at compiel time, it should inline to a +// direct call to the right swap_byteNN() function. +template +static inline T swap_byte(T x) { + if (sizeof(T) == 8) + return swap_byte64((uint64_t)x); + else if (sizeof(T) == 4) + return swap_byte32((uint32_t)x); + else if (sizeof(T) == 2) + return swap_byte16((uint16_t)x); + else if (sizeof(T) == 1) + return x; + else + panic("Can't byte-swap values larger than 64 bits"); +} //The conversion functions with fixed endianness on both ends don't need to //be in a namespace diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index f027dbf244..a3990e2fd1 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -33,8 +33,8 @@ #ifndef __SIM_SYSCALL_EMUL_HH__ #define __SIM_SYSCALL_EMUL_HH__ -#define BSD_HOST (defined(__APPLE__) || defined(__OpenBSD__) || \ - defined(__FreeBSD__)) +#define NO_STAT64 (defined(__APPLE__) || defined(__OpenBSD__) || \ + defined(__FreeBSD__) || defined(__CYGWIN__)) /// /// @file syscall_emul.hh @@ -507,7 +507,7 @@ fstat64Func(SyscallDesc *desc, int callnum, Process *process, return -EBADF; } -#if BSD_HOST +#if NO_STAT64 struct stat hostBuf; int result = fstat(process->sim_fd(fd), &hostBuf); #else @@ -557,7 +557,7 @@ lstat64Func(SyscallDesc *desc, int callnum, Process *process, if (!tc->getMemPort()->tryReadString(path, tc->getSyscallArg(0))) return -EFAULT; -#if BSD_HOST +#if NO_STAT64 struct stat hostBuf; int result = lstat(path.c_str(), &hostBuf); #else From 174f7753ae8b5d5c856f262e770e3184f37077ea Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sat, 17 Jun 2006 18:40:40 -0400 Subject: [PATCH 045/152] minor header cleanups src/dev/alpha_console.cc: Remove my name twice from header src/dev/ide_disk.cc: Spell my full name correctly src/mem/bus.hh: I think I edited much of this src/sim/byteswap.hh: I believe most of this code is mine or nate's --HG-- extra : convert_revision : b672b5de5492e04d2880fb51e7d63bc5587f2954 --- src/mem/bus.hh | 1 + src/sim/byteswap.hh | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mem/bus.hh b/src/mem/bus.hh index c2b78c31fd..9c7054b949 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ron Dreslinski + * Ali Saidi */ /** diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh index a3138a25e9..c7abc4bff3 100644 --- a/src/sim/byteswap.hh +++ b/src/sim/byteswap.hh @@ -25,7 +25,8 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Gabe Black + * Authors: Ali Saidi + * Nathan Binkert */ //The purpose of this file is to provide endainness conversion utility From 8dfc475a9d91eb17da5191e018c3991a3ff99c5a Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sat, 17 Jun 2006 19:06:28 -0400 Subject: [PATCH 046/152] add mac os x fast byte swap code --HG-- extra : convert_revision : 591e5adbf86feb894fceea982b9303da70a41955 --- src/sim/byteswap.hh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh index c78c77364c..c1d846aa8e 100644 --- a/src/sim/byteswap.hh +++ b/src/sim/byteswap.hh @@ -49,6 +49,10 @@ #include #endif +#if defined(__APPLE__) +#include +#endif + //These functions actually perform the swapping for parameters //of various bit lengths static inline uint64_t @@ -56,6 +60,8 @@ swap_byte64(uint64_t x) { #if defined(linux) return bswap_64(x); +#elif defined(__APPLE__) + return OSSwapInt64(x); #else return (uint64_t)((((uint64_t)(x) & 0xff) << 56) | ((uint64_t)(x) & 0xff00ULL) << 40 | @@ -73,6 +79,8 @@ swap_byte32(uint32_t x) { #if defined(linux) return bswap_32(x); +#elif defined(__APPLE__) + return OSSwapInt32(x); #else return (uint32_t)(((uint32_t)(x) & 0xff) << 24 | ((uint32_t)(x) & 0xff00) << 8 | ((uint32_t)(x) & 0xff0000) >> 8 | @@ -85,6 +93,8 @@ swap_byte16(uint16_t x) { #if defined(linux) return bswap_16(x); +#elif defined(__APPLE__) + return OSSwapInt16(x); #else return (uint16_t)(((uint16_t)(x) & 0xff) << 8 | ((uint16_t)(x) & 0xff00) >> 8); From d91f4d172a9c9f82348899018c296eea186b2d74 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 17 Jun 2006 19:18:53 -0400 Subject: [PATCH 047/152] include misc.hh for panic --HG-- extra : convert_revision : 05e59f45b98e862f9d61bec223871b314eb2195e --- src/sim/byteswap.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sim/byteswap.hh b/src/sim/byteswap.hh index c1d846aa8e..f1f244150f 100644 --- a/src/sim/byteswap.hh +++ b/src/sim/byteswap.hh @@ -36,6 +36,7 @@ #ifndef __SIM_BYTE_SWAP_HH__ #define __SIM_BYTE_SWAP_HH__ +#include "base/misc.hh" #include "sim/host.hh" // This lets us figure out what the byte order of the host system is From f3d74759ca2c21b45e4cb9255ba4c3cd699b90d5 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 17 Jun 2006 21:39:25 -0400 Subject: [PATCH 048/152] Split off instantiation into separate CC files for each of the models. This makes it easier to be able to specify only certain CPU models. src/cpu/SConscript: Split off instantiations into separate CC files. This makes it easier to split them per CPU model. src/cpu/base_dyn_inst_impl.hh: Move instantations out of impl.hh file and into a cc file. src/cpu/checker/cpu_impl.hh: Move instantiations over to .cc files inside each CPU's directory. Makes it easier to only use what's actually included. src/cpu/o3/bpred_unit.cc: Pull Ozone instantiations out of this .cc file; put them into the ozone's CC file. src/cpu/o3/checker_builder.cc: Instantiate Checker for O3 CPU. src/cpu/ozone/checker_builder.cc: Instantiate Checker for Ozone CPU. --HG-- rename : src/cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst_impl.hh rename : src/cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh rename : src/cpu/checker/o3_builder.cc => src/cpu/o3/checker_builder.cc rename : src/cpu/checker/ozone_builder.cc => src/cpu/ozone/checker_builder.cc extra : convert_revision : 4e5f928b165379c06d31071c544ea46cf0b8fa71 --- src/cpu/SConscript | 11 ++--- ...base_dyn_inst.cc => base_dyn_inst_impl.hh} | 26 ------------ src/cpu/checker/{cpu.cc => cpu_impl.hh} | 14 +------ src/cpu/o3/base_dyn_inst.cc | 40 +++++++++++++++++++ src/cpu/o3/bpred_unit.cc | 4 -- .../o3_builder.cc => o3/checker_builder.cc} | 5 ++- src/cpu/ozone/bpred_unit.cc | 36 +++++++++++++++++ .../checker_builder.cc} | 5 ++- src/cpu/ozone/ozone_base_dyn_inst.cc | 39 ++++++++++++++++++ src/cpu/ozone/simple_base_dyn_inst.cc | 39 ++++++++++++++++++ 10 files changed, 169 insertions(+), 50 deletions(-) rename src/cpu/{base_dyn_inst.cc => base_dyn_inst_impl.hh} (93%) rename src/cpu/checker/{cpu.cc => cpu_impl.hh} (98%) create mode 100644 src/cpu/o3/base_dyn_inst.cc rename src/cpu/{checker/o3_builder.cc => o3/checker_builder.cc} (97%) create mode 100644 src/cpu/ozone/bpred_unit.cc rename src/cpu/{checker/ozone_builder.cc => ozone/checker_builder.cc} (98%) create mode 100644 src/cpu/ozone/ozone_base_dyn_inst.cc create mode 100644 src/cpu/ozone/simple_base_dyn_inst.cc diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 608625ed4a..ab3c1f1d28 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -122,11 +122,11 @@ if 'FastCPU' in env['CPU_MODELS']: if 'AlphaO3CPU' in env['CPU_MODELS']: sources += Split(''' - base_dyn_inst.cc o3/2bit_local_pred.cc o3/alpha_dyn_inst.cc o3/alpha_cpu.cc o3/alpha_cpu_builder.cc + o3/base_dyn_inst.cc o3/bpred_unit.cc o3/btb.cc o3/commit.cc @@ -149,7 +149,7 @@ if 'AlphaO3CPU' in env['CPU_MODELS']: o3/tournament_pred.cc ''') if 'CheckerCPU' in env['CPU_MODELS']: - sources += Split('checker/o3_builder.cc') + sources += Split('o3/checker_builder.cc') if 'OzoneSimpleCPU' in env['CPU_MODELS']: sources += Split(''' @@ -161,18 +161,19 @@ if 'OzoneSimpleCPU' in env['CPU_MODELS']: ozone/inst_queue.cc ozone/rename_table.cc ''') - if 'CheckerCPU' in env['CPU_MODELS']: - sources += Split('checker/ozone_builder.cc') if 'OzoneCPU' in env['CPU_MODELS']: sources += Split(''' + ozone/base_dyn_inst.cc + ozone/bpred_unit.cc ozone/lsq_unit.cc ozone/lw_back_end.cc ozone/lw_lsq.cc ''') + if 'CheckerCPU' in env['CPU_MODELS']: + sources += Split('ozone/checker_builder.cc') if 'CheckerCPU' in env['CPU_MODELS']: - sources += Split('checker/cpu.cc') checker_supports = False for i in CheckerSupportedCPUList: if i in env['CPU_MODELS']: diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst_impl.hh similarity index 93% rename from src/cpu/base_dyn_inst.cc rename to src/cpu/base_dyn_inst_impl.hh index 5e2a6392a7..91424faad5 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst_impl.hh @@ -41,10 +41,6 @@ #include "mem/request.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu.hh" -//#include "cpu/ozone/simple_impl.hh" -//#include "cpu/ozone/ozone_impl.hh" using namespace std; using namespace TheISA; @@ -301,25 +297,3 @@ BaseDynInst::eaSrcsReady() return true; } - -// Forward declaration -template class BaseDynInst; - -template <> -int -BaseDynInst::instcount = 0; -/* -// Forward declaration -template class BaseDynInst; - -template <> -int -BaseDynInst::instcount = 0; - -// Forward declaration -template class BaseDynInst; - -template <> -int -BaseDynInst::instcount = 0; -*/ diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu_impl.hh similarity index 98% rename from src/cpu/checker/cpu.cc rename to src/cpu/checker/cpu_impl.hh index 45c57d276a..5091c7c1ad 100644 --- a/src/cpu/checker/cpu.cc +++ b/src/cpu/checker/cpu_impl.hh @@ -43,16 +43,10 @@ #include "sim/sim_object.hh" #include "sim/stats.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" - -//#include "cpu/ozone/dyn_inst.hh" -//#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" - #if FULL_SYSTEM #include "sim/system.hh" #include "arch/vtophys.hh" +#include "kern/kernel_stats.hh" #endif // FULL_SYSTEM using namespace std; @@ -826,9 +820,3 @@ Checker::dumpInsts() } } - -//template -//class Checker > >; -// Manually instantiate checker -template -class Checker > >; diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc new file mode 100644 index 0000000000..1f7540d6a9 --- /dev/null +++ b/src/cpu/o3/base_dyn_inst.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" + +// Explicit instantiation +template class BaseDynInst; + +template <> +int +BaseDynInst::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index 2944387045..c35c0a0aa9 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -31,9 +31,5 @@ #include "cpu/o3/bpred_unit_impl.hh" #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_dyn_inst.hh" -//#include "cpu/ozone/ozone_impl.hh" -//#include "cpu/ozone/simple_impl.hh" template class BPredUnit; -//template class BPredUnit; -//template class BPredUnit; diff --git a/src/cpu/checker/o3_builder.cc b/src/cpu/o3/checker_builder.cc similarity index 97% rename from src/cpu/checker/o3_builder.cc rename to src/cpu/o3/checker_builder.cc index 534a5e28c3..58c40d00c9 100644 --- a/src/cpu/checker/o3_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -30,7 +30,7 @@ #include -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/alpha_dyn_inst.hh" #include "cpu/o3/alpha_impl.hh" @@ -40,6 +40,9 @@ class MemObject; +template +class Checker > >; + /** * Specific non-templated derived class used for SimObject configuration. */ diff --git a/src/cpu/ozone/bpred_unit.cc b/src/cpu/ozone/bpred_unit.cc new file mode 100644 index 0000000000..835324ce1b --- /dev/null +++ b/src/cpu/ozone/bpred_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class BPredUnit; +template class BPredUnit; diff --git a/src/cpu/checker/ozone_builder.cc b/src/cpu/ozone/checker_builder.cc similarity index 98% rename from src/cpu/checker/ozone_builder.cc rename to src/cpu/ozone/checker_builder.cc index 3c43ab503f..f6786f24b9 100644 --- a/src/cpu/checker/ozone_builder.cc +++ b/src/cpu/ozone/checker_builder.cc @@ -30,7 +30,7 @@ #include -#include "cpu/checker/cpu.hh" +#include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" #include "cpu/ozone/dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" @@ -39,6 +39,9 @@ #include "sim/process.hh" #include "sim/sim_object.hh" +template +class Checker > >; + /** * Specific non-templated derived class used for SimObject configuration. */ diff --git a/src/cpu/ozone/ozone_base_dyn_inst.cc b/src/cpu/ozone/ozone_base_dyn_inst.cc new file mode 100644 index 0000000000..5a3a69dff8 --- /dev/null +++ b/src/cpu/ozone/ozone_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst; + +template <> +int +BaseDynInst::instcount = 0; diff --git a/src/cpu/ozone/simple_base_dyn_inst.cc b/src/cpu/ozone/simple_base_dyn_inst.cc new file mode 100644 index 0000000000..fdaeaf57e5 --- /dev/null +++ b/src/cpu/ozone/simple_base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +// Explicit instantiation +template class BaseDynInst; + +template <> +int +BaseDynInst::instcount = 0; From dfe0ea6cba9c0765c0217a835ed2a1b56837dbb2 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 17 Jun 2006 22:01:30 -0400 Subject: [PATCH 049/152] Fix up code to be able to use the Checker. SConstruct: Remove check for Checker from this SConstruct src/arch/SConscript: Specific check if CheckerCPU is being used. Not the cleanest, but works for now. src/cpu/SConscript: Code to handle using the CheckerCPU a little better. Allows -c to be used normally. --HG-- extra : convert_revision : 0a82f16db0f38e5ce114d08368477bd211331fa3 --- SConstruct | 4 ---- src/arch/SConscript | 5 +++++ src/cpu/SConscript | 21 +++++++++++++++------ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/SConstruct b/SConstruct index c9ba13679c..ca87842bae 100644 --- a/SConstruct +++ b/SConstruct @@ -472,10 +472,6 @@ for build_path in build_paths: env.ParseConfig(mysql_config_libs) env.ParseConfig(mysql_config_include) - # Check if the Checker is being used. If so append it to env['CPU_MODELS'] - if env['USE_CHECKER']: - env['CPU_MODELS'].append('CheckerCPU') - # Save sticky option settings back to current options file sticky_opts.Save(current_opts_file, env) diff --git a/src/arch/SConscript b/src/arch/SConscript index ff460dafd5..bc517341a4 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -128,6 +128,11 @@ isa_desc_gen_files = Split('decoder.cc decoder.hh') isa_desc_gen_files += [CpuModel.dict[cpu].filename for cpu in env['CPU_MODELS']] +# Also include the CheckerCPU as one of the models if it is being +# enabled via command line. +if env['USE_CHECKER']: + isa_desc_gen_files += [CpuModel.dict['CheckerCPU'].filename] + # The emitter patches up the sources & targets to include the # autogenerated files as targets and isa parser itself as a source. def isa_desc_emitter(target, source, env): diff --git a/src/cpu/SConscript b/src/cpu/SConscript index ab3c1f1d28..1bb8e8a9f6 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -68,6 +68,13 @@ mem_comp_sig_template = ''' virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; }; ''' +# Generate a temporary CPU list, including the CheckerCPU if +# it's enabled. This isn't used for anything else other than StaticInst +# headers. +temp_cpu_list = env['CPU_MODELS'] +if env['USE_CHECKER']: + temp_cpu_list.append('CheckerCPU') + # Generate header. def gen_cpu_exec_signatures(target, source, env): f = open(str(target[0]), 'w') @@ -75,7 +82,7 @@ def gen_cpu_exec_signatures(target, source, env): #ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ #define __CPU_STATIC_INST_EXEC_SIGS_HH__ ''' - for cpu in env['CPU_MODELS']: + for cpu in temp_cpu_list: xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] print >> f, exec_sig_template % (xc_type, xc_type, xc_type) print >> f, ''' @@ -85,12 +92,14 @@ def gen_cpu_exec_signatures(target, source, env): # Generate string that gets printed when header is rebuilt def gen_sigs_string(target, source, env): return "Generating static_inst_exec_sigs.hh: " \ - + ', '.join(env['CPU_MODELS']) + + ', '.join(temp_cpu_list) # Add command to generate header to environment. env.Command('static_inst_exec_sigs.hh', models_db, Action(gen_cpu_exec_signatures, gen_sigs_string, - varlist = ['CPU_MODELS'])) + varlist = temp_cpu_list)) + +env.Depends('static_inst_exec_sigs.hh', Value(env['USE_CHECKER'])) # List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True # and one of these are not being used. @@ -148,7 +157,7 @@ if 'AlphaO3CPU' in env['CPU_MODELS']: o3/store_set.cc o3/tournament_pred.cc ''') - if 'CheckerCPU' in env['CPU_MODELS']: + if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') if 'OzoneSimpleCPU' in env['CPU_MODELS']: @@ -170,10 +179,10 @@ if 'OzoneCPU' in env['CPU_MODELS']: ozone/lw_back_end.cc ozone/lw_lsq.cc ''') - if 'CheckerCPU' in env['CPU_MODELS']: + if env['USE_CHECKER']: sources += Split('ozone/checker_builder.cc') -if 'CheckerCPU' in env['CPU_MODELS']: +if env['USE_CHECKER']: checker_supports = False for i in CheckerSupportedCPUList: if i in env['CPU_MODELS']: From 393e77fbe94ccbcc422d2575c500d1590ca87d00 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 17 Jun 2006 22:04:48 -0400 Subject: [PATCH 050/152] Change options back to just being flags instead of taking in a True/False value. src/python/m5/__init__.py: Change up options. Now setting the flag enables/disables, each of which is the opposite of the default values found in the Python class. --HG-- extra : convert_revision : 23889b89e6105a437a74906587d90ab6ba885c97 --- src/python/m5/__init__.py | 88 ++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 52 deletions(-) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index c0728120c5..d1e443b641 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -70,32 +70,32 @@ def setTraceStart(option, opt_str, value, parser): def setTraceFile(option, opt_str, value, parser): objects.Trace.file = value -def usePCSymbol(option, opt_str, value, parser): - objects.ExecutionTrace.pc_symbol = value +def noPCSymbol(option, opt_str, value, parser): + objects.ExecutionTrace.pc_symbol = False -def printCycle(option, opt_str, value, parser): - objects.ExecutionTrace.print_cycle = value +def noPrintCycle(option, opt_str, value, parser): + objects.ExecutionTrace.print_cycle = False -def printOp(option, opt_str, value, parser): - objects.ExecutionTrace.print_opclass = value +def noPrintOpclass(option, opt_str, value, parser): + objects.ExecutionTrace.print_opclass = False -def printThread(option, opt_str, value, parser): - objects.ExecutionTrace.print_thread = value +def noPrintThread(option, opt_str, value, parser): + objects.ExecutionTrace.print_thread = False -def printEA(option, opt_str, value, parser): - objects.ExecutionTrace.print_effaddr = value +def noPrintEA(option, opt_str, value, parser): + objects.ExecutionTrace.print_effaddr = False -def printData(option, opt_str, value, parser): - objects.ExecutionTrace.print_data = value +def noPrintData(option, opt_str, value, parser): + objects.ExecutionTrace.print_data = False def printFetchseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_fetchseq = value + objects.ExecutionTrace.print_fetchseq = True def printCpseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_cpseq = value + objects.ExecutionTrace.print_cpseq = True def dumpOnExit(option, opt_str, value, parser): - objects.Trace.dump_on_exit = value + objects.Trace.dump_on_exit = True def debugBreak(option, opt_str, value, parser): objects.Debug.break_cycles = value @@ -116,47 +116,31 @@ standardOptions = [ callback=setTraceStart), optparse.make_option("--tracefile", type="string", action="callback", callback=setTraceFile), - optparse.make_option("--pcsymbol", type="choice", choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=usePCSymbol, - help="Use PC symbols in trace output"), - optparse.make_option("--printcycle", type="choice", choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=printCycle, - help="Print cycle numbers in trace output"), - optparse.make_option("--printopclass", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=printOp, - help="Print cycle numbers in trace output"), - optparse.make_option("--printthread", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=printThread, - help="Print thread number in trace output"), - optparse.make_option("--printeffaddr", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=printEA, - help="Print effective address in trace output"), - optparse.make_option("--printdata", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, - action="callback", callback=printData, - help="Print result data in trace output"), - optparse.make_option("--printfetchseq", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, + optparse.make_option("--nopcsymbol", + action="callback", callback=noPCSymbol, + help="Disable PC symbols in trace output"), + optparse.make_option("--noprintcycle", + action="callback", callback=noPrintCycle, + help="Don't print cycle numbers in trace output"), + optparse.make_option("--noprintopclass", + action="callback", callback=noPrintOpclass, + help="Don't print op class type in trace output"), + optparse.make_option("--noprintthread", + action="callback", callback=noPrintThread, + help="Don't print thread number in trace output"), + optparse.make_option("--noprinteffaddr", + action="callback", callback=noPrintEA, + help="Don't print effective address in trace output"), + optparse.make_option("--noprintdata", + action="callback", callback=noPrintData, + help="Don't print result data in trace output"), + optparse.make_option("--printfetchseq", action="callback", callback=printFetchseq, help="Print fetch sequence numbers in trace output"), - optparse.make_option("--printcpseq", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, + optparse.make_option("--printcpseq", action="callback", callback=printCpseq, help="Print correct path sequence numbers in trace output"), - optparse.make_option("--dumponexit", type="choice", - choices=TrueOrFalse, - default="True", metavar=TorF, + optparse.make_option("--dumponexit", action="callback", callback=dumpOnExit, help="Dump trace buffer on exit"), optparse.make_option("--debugbreak", type="int", metavar="CYCLE", From 8f0d04cd10b4c09b336ea2c2fc5e73af1b3ff0b3 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sat, 17 Jun 2006 22:28:40 -0400 Subject: [PATCH 051/152] Delete old doxygen docs... now on wiki. Update release scripts for new tree structure. --HG-- extra : convert_revision : 35603f5476abd296625f777718c1245593a5dfc4 From 14b9cda9f62cb5c7f580b08aefbf7797d0bc8a12 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sat, 17 Jun 2006 22:55:00 -0400 Subject: [PATCH 052/152] Minor updates. src/cpu/o3/alpha_cpu.hh: Fix #define in header. util/rundiff: Fix file comments to be more correct. util/tracediff: Update comments to be more correct. --HG-- extra : convert_revision : a28030ce8979de3d9361191c6af23743460dc53e --- src/cpu/o3/alpha_cpu.hh | 6 +++--- util/rundiff | 2 +- util/tracediff | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index 4daa8b3ba9..55b9751420 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__ -#define __CPU_O3_ALPHA_FULL_CPU_HH__ +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ #include "arch/isa_traits.hh" #include "cpu/thread_context.hh" @@ -431,4 +431,4 @@ class AlphaO3CPU : public FullO3CPU bool lockFlag; }; -#endif // __CPU_O3_ALPHA_FULL_CPU_HH__ +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/util/rundiff b/util/rundiff index 533f448b10..c34bb53a3f 100755 --- a/util/rundiff +++ b/util/rundiff @@ -39,7 +39,7 @@ # "filename" is a pipe (|). Thus to compare the instruction traces # from two versions of m5 (m5a and m5b), you can do this: # -# rundiff 'm5a --trace:flags=InstExec |' 'm5b --trace:flags=InstExec |' +# rundiff 'm5a --traceflags=InstExec |' 'm5b --traceflags=InstExec |' # use strict; diff --git a/util/tracediff b/util/tracediff index f11431293d..a7efc260d5 100755 --- a/util/tracediff +++ b/util/tracediff @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright (c) 2003-2005 The Regents of The University of Michigan +# Copyright (c) 2003-2006 The Regents of The University of Michigan # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,7 +36,7 @@ # If you want to pass different arguments to the two instances of m5, # you can embed them in the simulator arguments like this: # -# % tracediff "m5.opt --foo.bar=1" "m5.opt --foo.bar=2" [common args] +# % tracediff "m5.opt --option1" "m5.opt --option2" [common args] # if (@ARGV < 2) { From 69c34554e5d621d45ce68cbd643baf9b0d049240 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Sun, 18 Jun 2006 11:10:08 -0400 Subject: [PATCH 053/152] minor device fixups configs/test/SysPaths.py: remove some tabs and add /n/poolfs/z/dist/m5/system src/dev/io_device.cc: fix since pio timing dma packts colud be nacked too src/dev/io_device.hh: move DmaReqState into DmaDevie --HG-- extra : convert_revision : 2b5300d85ab33b3753afc54bc6a04a47b6e00d20 --- configs/test/SysPaths.py | 2 +- src/dev/io_device.cc | 27 ++++++++++++++++++-------- src/dev/io_device.hh | 41 ++++++++++++++++++++-------------------- 3 files changed, 40 insertions(+), 30 deletions(-) diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py index 9acfedc8b1..e458d52253 100644 --- a/configs/test/SysPaths.py +++ b/configs/test/SysPaths.py @@ -13,7 +13,7 @@ def load_defaults(): try: path = env['M5_PATH'].split(':') except KeyError: - path = [ '/dist/m5/system' ] + path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ] for systemdir in path: if os.path.isdir(systemdir): diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index 485216874d..e769ef0372 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -62,13 +62,14 @@ PioPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) void PioPort::recvRetry() { - Packet* pkt = transmitList.front(); - if (Port::sendTiming(pkt)) { - transmitList.pop_front(); + bool result = true; + while (result && transmitList.size()) { + result = Port::sendTiming(transmitList.front()); + if (result) + transmitList.pop_front(); } } - void PioPort::SendEvent::process() { @@ -83,10 +84,20 @@ PioPort::SendEvent::process() bool PioPort::recvTiming(Packet *pkt) { - Tick latency = device->recvAtomic(pkt); - // turn packet around to go back to requester - pkt->makeTimingResponse(); - sendTiming(pkt, latency); + if (pkt->result == Packet::Nacked) { + pkt->reinitNacked(); + if (transmitList.size()) { + transmitList.push_front(pkt); + } else { + if (!Port::sendTiming(pkt)) + transmitList.push_front(pkt); + } + } else { + Tick latency = device->recvAtomic(pkt); + // turn packet around to go back to requester + pkt->makeTimingResponse(); + sendTiming(pkt, latency); + } return true; } diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index cd2c25eeb7..a2b61c7f43 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -119,30 +119,29 @@ class PioPort : public Port }; -struct DmaReqState : public Packet::SenderState -{ - /** Event to call on the device when this transaction (all packets) - * complete. */ - Event *completionEvent; - - /** Where we came from for some sanity checking. */ - Port *outPort; - - /** Total number of bytes that this transaction involves. */ - Addr totBytes; - - /** Number of bytes that have been acked for this transaction. */ - Addr numBytes; - - bool final; - DmaReqState(Event *ce, Port *p, Addr tb) - : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0) - {} -}; - class DmaPort : public Port { protected: + struct DmaReqState : public Packet::SenderState + { + /** Event to call on the device when this transaction (all packets) + * complete. */ + Event *completionEvent; + + /** Where we came from for some sanity checking. */ + Port *outPort; + + /** Total number of bytes that this transaction involves. */ + Addr totBytes; + + /** Number of bytes that have been acked for this transaction. */ + Addr numBytes; + + DmaReqState(Event *ce, Port *p, Addr tb) + : completionEvent(ce), outPort(p), totBytes(tb), numBytes(0) + {} + }; + DmaDevice *device; std::list transmitList; From 7171811cad27f4b62ec30ec41e087f74a479d10e Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 18 Jun 2006 12:30:24 -0400 Subject: [PATCH 054/152] Put sconsign in bulid dir. --HG-- extra : convert_revision : e1be318e99037842501306e7c35cf4d6690ebdce --- SConstruct | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index 8ddd07c636..1c9beea1d3 100644 --- a/SConstruct +++ b/SConstruct @@ -159,7 +159,7 @@ env = Environment(ENV = os.environ, # inherit user's environment vars ROOT = ROOT, SRCDIR = SRCDIR) -env.SConsignFile("sconsign") +env.SConsignFile(os.path.join(build_root,"sconsign")) # Default duplicate option is to use hard links, but this messes up # when you use emacs to edit a file in the target dir, as emacs moves From ca25e709077b8407fa378e2e19d273023fa0afa9 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 18 Jun 2006 15:58:14 -0400 Subject: [PATCH 055/152] use 'tick' instead of 'cycle' --HG-- extra : convert_revision : e7119d20ef95deab16081743c885979b0fa85548 --- configs/test/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/test/test.py b/configs/test/test.py index a570c1a086..48d43cee93 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -55,5 +55,5 @@ if options.maxtick: else: exit_event = m5.simulate() -print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() +print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() From 0fc3055e964c96d3f975f791a7ad36db3d0e4e09 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 18:03:08 -0400 Subject: [PATCH 056/152] Fix to have the static inst exec sigs also dependent on the CPU models used. --HG-- extra : convert_revision : 65d978d638dd9a57a641ca52adcf2c0ef48edf1c --- src/cpu/SConscript | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpu/SConscript b/src/cpu/SConscript index eea9ba64b9..3ca4a14716 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -100,6 +100,7 @@ env.Command('static_inst_exec_sigs.hh', models_db, varlist = temp_cpu_list)) env.Depends('static_inst_exec_sigs.hh', Value(env['USE_CHECKER'])) +env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS'])) # List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True # and one of these are not being used. From 9ca5427c0367d9aa391f491b8a1a4309f3b24385 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 18:05:12 -0400 Subject: [PATCH 057/152] Split Checker up properly into templated and non-templated definitions. --HG-- extra : convert_revision : 3ead18e42f4a536f2f868da07cb81a8940a7fa2f --- src/cpu/SConscript | 1 + src/cpu/checker/cpu.cc | 408 ++++++++++++++++++++++++++++++++++++ src/cpu/checker/cpu_impl.hh | 362 -------------------------------- 3 files changed, 409 insertions(+), 362 deletions(-) create mode 100644 src/cpu/checker/cpu.cc diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 3ca4a14716..0b0bf46921 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -184,6 +184,7 @@ if 'OzoneCPU' in env['CPU_MODELS']: sources += Split('ozone/checker_builder.cc') if env['USE_CHECKER']: + sources += Split('checker/cpu.cc') checker_supports = False for i in CheckerSupportedCPUList: if i in env['CPU_MODELS']: diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc new file mode 100644 index 0000000000..1540a6b949 --- /dev/null +++ b/src/cpu/checker/cpu.cc @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include +#include + +#include "cpu/base.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/simple_thread.hh" +#include "cpu/thread_context.hh" +#include "cpu/static_inst.hh" +#include "mem/packet_impl.hh" +#include "sim/byteswap.hh" + +#if FULL_SYSTEM +#include "arch/vtophys.hh" +#include "kern/kernel_stats.hh" +#endif // FULL_SYSTEM + +using namespace std; +//The CheckerCPU does alpha only +using namespace AlphaISA; + +void +CheckerCPU::init() +{ +} + +CheckerCPU::CheckerCPU(Params *p) + : BaseCPU(p), thread(NULL), tc(NULL) +{ + memReq = NULL; + + numInst = 0; + startNumInst = 0; + numLoad = 0; + startNumLoad = 0; + youngestSN = 0; + + changedPC = willChangePC = changedNextPC = false; + + exitOnError = p->exitOnError; + warnOnlyOnLoadError = p->warnOnlyOnLoadError; +#if FULL_SYSTEM + itb = p->itb; + dtb = p->dtb; + systemPtr = NULL; +#else + process = p->process; +#endif + + result.integer = 0; +} + +CheckerCPU::~CheckerCPU() +{ +} + +void +CheckerCPU::setMemory(MemObject *mem) +{ +#if !FULL_SYSTEM + memPtr = mem; + thread = new SimpleThread(this, /* thread_num */ 0, process, + /* asid */ 0, mem); + + thread->setStatus(ThreadContext::Suspended); + tc = thread->getTC(); + threadContexts.push_back(tc); +#endif +} + +void +CheckerCPU::setSystem(System *system) +{ +#if FULL_SYSTEM + systemPtr = system; + + thread = new SimpleThread(this, 0, systemPtr, itb, dtb, false); + + thread->setStatus(ThreadContext::Suspended); + tc = thread->getTC(); + threadContexts.push_back(tc); + delete thread->kernelStats; + thread->kernelStats = NULL; +#endif +} + +void +CheckerCPU::setIcachePort(Port *icache_port) +{ + icachePort = icache_port; +} + +void +CheckerCPU::setDcachePort(Port *dcache_port) +{ + dcachePort = dcache_port; +} + +void +CheckerCPU::serialize(ostream &os) +{ +/* + BaseCPU::serialize(os); + SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc", name())); + thread->serialize(os); + cacheCompletionEvent.serialize(os); +*/ +} + +void +CheckerCPU::unserialize(Checkpoint *cp, const string §ion) +{ +/* + BaseCPU::unserialize(cp, section); + UNSERIALIZE_SCALAR(inst); + thread->unserialize(cp, csprintf("%s.xc", section)); +*/ +} + +Fault +CheckerCPU::copySrcTranslate(Addr src) +{ + panic("Unimplemented!"); +} + +Fault +CheckerCPU::copy(Addr dest) +{ + panic("Unimplemented!"); +} + +template +Fault +CheckerCPU::read(Addr addr, T &data, unsigned flags) +{ + // need to fill in CPU & thread IDs here + memReq = new Request(); + + memReq->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + + // translate to physical address + translateDataReadReq(memReq); + + Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast); + + pkt->dataStatic(&data); + + if (!(memReq->getFlags() & UNCACHEABLE)) { + // Access memory to see if we have the same data + dcachePort->sendFunctional(pkt); + } else { + // Assume the data is correct if it's an uncached access + memcpy(&data, &unverifiedResult.integer, sizeof(T)); + } + + delete pkt; + + return NoFault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +CheckerCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +CheckerCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + +template<> +Fault +CheckerCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + +template +Fault +CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + // need to fill in CPU & thread IDs here + memReq = new Request(); + + memReq->setVirt(0, addr, sizeof(T), flags, thread->readPC()); + + // translate to physical address + thread->translateDataWriteReq(memReq); + + // Can compare the write data and result only if it's cacheable, + // not a store conditional, or is a store conditional that + // succeeded. + // @todo: Verify that actual memory matches up with these values. + // Right now it only verifies that the instruction data is the + // same as what was in the request that got sent to memory; there + // is no verification that it is the same as what is in memory. + // This is because the LSQ would have to be snooped in the CPU to + // verify this data. + if (unverifiedReq && + !(unverifiedReq->getFlags() & UNCACHEABLE) && + (!(unverifiedReq->getFlags() & LOCKED) || + ((unverifiedReq->getFlags() & LOCKED) && + unverifiedReq->getScResult() == 1))) { + T inst_data; +/* + // This code would work if the LSQ allowed for snooping. + Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast); + pkt.dataStatic(&inst_data); + + dcachePort->sendFunctional(pkt); + + delete pkt; +*/ + memcpy(&inst_data, unverifiedMemData, sizeof(T)); + + if (data != inst_data) { + warn("%lli: Store value does not match value in memory! " + "Instruction: %#x, memory: %#x", + curTick, inst_data, data); + handleError(); + } + } + + // Assume the result was the same as the one passed in. This checker + // doesn't check if the SC should succeed or fail, it just checks the + // value. + if (res && unverifiedReq->scResultValid()) + *res = unverifiedReq->getScResult(); + + return NoFault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + +template<> +Fault +CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +#if FULL_SYSTEM +Addr +CheckerCPU::dbg_vtophys(Addr addr) +{ + return vtophys(tc, addr); +} +#endif // FULL_SYSTEM + +bool +CheckerCPU::translateInstReq(Request *req) +{ +#if FULL_SYSTEM + return (thread->translateInstReq(req) == NoFault); +#else + thread->translateInstReq(req); + return true; +#endif +} + +void +CheckerCPU::translateDataReadReq(Request *req) +{ + thread->translateDataReadReq(req); + + if (req->getVaddr() != unverifiedReq->getVaddr()) { + warn("%lli: Request virtual addresses do not match! Inst: %#x, " + "checker: %#x", + curTick, unverifiedReq->getVaddr(), req->getVaddr()); + handleError(); + } + req->setPaddr(unverifiedReq->getPaddr()); + + if (checkFlags(req)) { + warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", + curTick, unverifiedReq->getFlags(), req->getFlags()); + handleError(); + } +} + +void +CheckerCPU::translateDataWriteReq(Request *req) +{ + thread->translateDataWriteReq(req); + + if (req->getVaddr() != unverifiedReq->getVaddr()) { + warn("%lli: Request virtual addresses do not match! Inst: %#x, " + "checker: %#x", + curTick, unverifiedReq->getVaddr(), req->getVaddr()); + handleError(); + } + req->setPaddr(unverifiedReq->getPaddr()); + + if (checkFlags(req)) { + warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", + curTick, unverifiedReq->getFlags(), req->getFlags()); + handleError(); + } +} + +bool +CheckerCPU::checkFlags(Request *req) +{ + // Remove any dynamic flags that don't have to do with the request itself. + unsigned flags = unverifiedReq->getFlags(); + unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT; + flags = flags & (mask); + if (flags == req->getFlags()) { + return false; + } else { + return true; + } +} + +void +CheckerCPU::dumpAndExit() +{ + warn("%lli: Checker PC:%#x, next PC:%#x", + curTick, thread->readPC(), thread->readNextPC()); + panic("Checker found an error!"); +} diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 5091c7c1ad..137e1c46d3 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -32,7 +32,6 @@ #include #include "base/refcnt.hh" -#include "cpu/base.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" @@ -44,374 +43,13 @@ #include "sim/stats.hh" #if FULL_SYSTEM -#include "sim/system.hh" #include "arch/vtophys.hh" -#include "kern/kernel_stats.hh" #endif // FULL_SYSTEM using namespace std; //The CheckerCPU does alpha only using namespace AlphaISA; -void -CheckerCPU::init() -{ -} - -CheckerCPU::CheckerCPU(Params *p) - : BaseCPU(p), thread(NULL), tc(NULL) -{ - memReq = NULL; - - numInst = 0; - startNumInst = 0; - numLoad = 0; - startNumLoad = 0; - youngestSN = 0; - - changedPC = willChangePC = changedNextPC = false; - - exitOnError = p->exitOnError; - warnOnlyOnLoadError = p->warnOnlyOnLoadError; -#if FULL_SYSTEM - itb = p->itb; - dtb = p->dtb; - systemPtr = NULL; -#else - process = p->process; -#endif - - result.integer = 0; -} - -CheckerCPU::~CheckerCPU() -{ -} - -void -CheckerCPU::setMemory(MemObject *mem) -{ -#if !FULL_SYSTEM - memPtr = mem; - thread = new SimpleThread(this, /* thread_num */ 0, process, - /* asid */ 0, mem); - - thread->setStatus(ThreadContext::Suspended); - tc = thread->getTC(); - threadContexts.push_back(tc); -#endif -} - -void -CheckerCPU::setSystem(System *system) -{ -#if FULL_SYSTEM - systemPtr = system; - - thread = new SimpleThread(this, 0, systemPtr, itb, dtb, false); - - thread->setStatus(ThreadContext::Suspended); - tc = thread->getTC(); - threadContexts.push_back(tc); - delete thread->kernelStats; - thread->kernelStats = NULL; -#endif -} - -void -CheckerCPU::setIcachePort(Port *icache_port) -{ - icachePort = icache_port; -} - -void -CheckerCPU::setDcachePort(Port *dcache_port) -{ - dcachePort = dcache_port; -} - -void -CheckerCPU::serialize(ostream &os) -{ -/* - BaseCPU::serialize(os); - SERIALIZE_SCALAR(inst); - nameOut(os, csprintf("%s.xc", name())); - thread->serialize(os); - cacheCompletionEvent.serialize(os); -*/ -} - -void -CheckerCPU::unserialize(Checkpoint *cp, const string §ion) -{ -/* - BaseCPU::unserialize(cp, section); - UNSERIALIZE_SCALAR(inst); - thread->unserialize(cp, csprintf("%s.xc", section)); -*/ -} - -Fault -CheckerCPU::copySrcTranslate(Addr src) -{ - panic("Unimplemented!"); -} - -Fault -CheckerCPU::copy(Addr dest) -{ - panic("Unimplemented!"); -} - -template -Fault -CheckerCPU::read(Addr addr, T &data, unsigned flags) -{ - // need to fill in CPU & thread IDs here - memReq = new Request(); - - memReq->setVirt(0, addr, sizeof(T), flags, thread->readPC()); - - // translate to physical address - translateDataReadReq(memReq); - - Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast); - - pkt->dataStatic(&data); - - if (!(memReq->getFlags() & UNCACHEABLE)) { - // Access memory to see if we have the same data - dcachePort->sendFunctional(pkt); - } else { - // Assume the data is correct if it's an uncached access - memcpy(&data, &unverifiedResult.integer, sizeof(T)); - } - - delete pkt; - - return NoFault; -} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CheckerCPU::read(Addr addr, double &data, unsigned flags) -{ - return read(addr, *(uint64_t*)&data, flags); -} - -template<> -Fault -CheckerCPU::read(Addr addr, float &data, unsigned flags) -{ - return read(addr, *(uint32_t*)&data, flags); -} - -template<> -Fault -CheckerCPU::read(Addr addr, int32_t &data, unsigned flags) -{ - return read(addr, (uint32_t&)data, flags); -} - -template -Fault -CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) -{ - // need to fill in CPU & thread IDs here - memReq = new Request(); - - memReq->setVirt(0, addr, sizeof(T), flags, thread->readPC()); - - // translate to physical address - thread->translateDataWriteReq(memReq); - - // Can compare the write data and result only if it's cacheable, - // not a store conditional, or is a store conditional that - // succeeded. - // @todo: Verify that actual memory matches up with these values. - // Right now it only verifies that the instruction data is the - // same as what was in the request that got sent to memory; there - // is no verification that it is the same as what is in memory. - // This is because the LSQ would have to be snooped in the CPU to - // verify this data. - if (unverifiedReq && - !(unverifiedReq->getFlags() & UNCACHEABLE) && - (!(unverifiedReq->getFlags() & LOCKED) || - ((unverifiedReq->getFlags() & LOCKED) && - unverifiedReq->getScResult() == 1))) { - T inst_data; -/* - // This code would work if the LSQ allowed for snooping. - Packet *pkt = new Packet(memReq, Packet::ReadReq, Packet::Broadcast); - pkt.dataStatic(&inst_data); - - dcachePort->sendFunctional(pkt); - - delete pkt; -*/ - memcpy(&inst_data, unverifiedMemData, sizeof(T)); - - if (data != inst_data) { - warn("%lli: Store value does not match value in memory! " - "Instruction: %#x, memory: %#x", - curTick, inst_data, data); - handleError(); - } - } - - // Assume the result was the same as the one passed in. This checker - // doesn't check if the SC should succeed or fail, it just checks the - // value. - if (res && unverifiedReq->scResultValid()) - *res = unverifiedReq->getScResult(); - - return NoFault; -} - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template -Fault -CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) -{ - return write(*(uint64_t*)&data, addr, flags, res); -} - -template<> -Fault -CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) -{ - return write(*(uint32_t*)&data, addr, flags, res); -} - -template<> -Fault -CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) -{ - return write((uint32_t)data, addr, flags, res); -} - - -#if FULL_SYSTEM -Addr -CheckerCPU::dbg_vtophys(Addr addr) -{ - return vtophys(tc, addr); -} -#endif // FULL_SYSTEM - -bool -CheckerCPU::translateInstReq(Request *req) -{ -#if FULL_SYSTEM - return (thread->translateInstReq(req) == NoFault); -#else - thread->translateInstReq(req); - return true; -#endif -} - -void -CheckerCPU::translateDataReadReq(Request *req) -{ - thread->translateDataReadReq(req); - - if (req->getVaddr() != unverifiedReq->getVaddr()) { - warn("%lli: Request virtual addresses do not match! Inst: %#x, " - "checker: %#x", - curTick, unverifiedReq->getVaddr(), req->getVaddr()); - handleError(); - } - req->setPaddr(unverifiedReq->getPaddr()); - - if (checkFlags(req)) { - warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", - curTick, unverifiedReq->getFlags(), req->getFlags()); - handleError(); - } -} - -void -CheckerCPU::translateDataWriteReq(Request *req) -{ - thread->translateDataWriteReq(req); - - if (req->getVaddr() != unverifiedReq->getVaddr()) { - warn("%lli: Request virtual addresses do not match! Inst: %#x, " - "checker: %#x", - curTick, unverifiedReq->getVaddr(), req->getVaddr()); - handleError(); - } - req->setPaddr(unverifiedReq->getPaddr()); - - if (checkFlags(req)) { - warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", - curTick, unverifiedReq->getFlags(), req->getFlags()); - handleError(); - } -} - -bool -CheckerCPU::checkFlags(Request *req) -{ - // Remove any dynamic flags that don't have to do with the request itself. - unsigned flags = unverifiedReq->getFlags(); - unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT; - flags = flags & (mask); - if (flags == req->getFlags()) { - return false; - } else { - return true; - } -} - -void -CheckerCPU::dumpAndExit() -{ - warn("%lli: Checker PC:%#x, next PC:%#x", - curTick, thread->readPC(), thread->readNextPC()); - panic("Checker found an error!"); -} - template void Checker::verify(DynInstPtr &completed_inst) From 6e95bcd333784e1292293980cb2c2fba9f2ac467 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 18:09:31 -0400 Subject: [PATCH 058/152] Misc fixes. src/cpu/o3/alpha_dyn_inst_impl.hh: Consolidate these calls into one. src/cpu/o3/commit_impl.hh: Include checker only if it's being used. src/cpu/o3/fetch_impl.hh: Do not deallocate request if it's a squashed response that was received. src/cpu/o3/lsq_unit.hh: Add in comment. src/cpu/o3/lsq_unit_impl.hh: Only include checker if it's being used. --HG-- extra : convert_revision : aae0bf1e19baae90f1e61d41191548612bbb3be6 --- src/cpu/o3/alpha_dyn_inst_impl.hh | 10 +--------- src/cpu/o3/commit_impl.hh | 5 ++++- src/cpu/o3/fetch_impl.hh | 1 - src/cpu/o3/lsq_unit.hh | 2 ++ src/cpu/o3/lsq_unit_impl.hh | 5 ++++- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh index 6183a755e5..855ee99634 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -102,15 +102,7 @@ template Fault AlphaDynInst::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc(pkt, this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 566324b69a..176f832468 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -36,11 +36,14 @@ #include "base/loader/symtab.hh" #include "base/timebuf.hh" -#include "cpu/checker/cpu.hh" #include "cpu/exetrace.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/thread_state.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; template diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 7cbf0ab026..e570dbb18d 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -357,7 +357,6 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) ++fetchIcacheSquashes; delete pkt->req; delete pkt; - memReq[tid] = NULL; return; } diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index cef6e0a2e7..9b67e61f2f 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -128,6 +128,8 @@ class LSQUnit { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ void completeDataAccess(PacketPtr pkt); /** Clears all the entries in the LQ. */ diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index f4a656aa19..714acb2ef7 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -31,12 +31,15 @@ #include "config/use_checker.hh" -#include "cpu/checker/cpu.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" #include "mem/packet.hh" #include "mem/request.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + template LSQUnit::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) From e6c04b1584998ed2ea532da4070b356c75906f63 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 18:10:17 -0400 Subject: [PATCH 059/152] Change ThreadState constructor ordering to match the rest of the ThreadStates. --HG-- extra : convert_revision : 63d98aa8b6a694c285d95a2a57e1b3aaef4cee3b --- src/cpu/o3/thread_state.hh | 2 +- src/cpu/simple_thread.cc | 4 ++-- src/cpu/thread_state.cc | 4 ++-- src/cpu/thread_state.hh | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh index 19cbffb444..1c81052042 100644 --- a/src/cpu/o3/thread_state.hh +++ b/src/cpu/o3/thread_state.hh @@ -82,7 +82,7 @@ struct O3ThreadState : public ThreadState { #else O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *mem) - : ThreadState(-1, _thread_num, mem, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { } #endif diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 48383ca930..db28b23e85 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -107,7 +107,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, #else SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject* memobj) - : ThreadState(-1, _thread_num, memobj, _process, _asid), + : ThreadState(-1, _thread_num, _process, _asid, memobj), cpu(_cpu) { /* Use this port to for syscall emulation writes to memory. */ @@ -124,7 +124,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, } SimpleThread::SimpleThread(RegFile *regFile) - : ThreadState(-1, -1, NULL, NULL, -1), cpu(NULL) + : ThreadState(-1, -1, NULL, -1, NULL), cpu(NULL) { regs = *regFile; tc = new ProxyThreadContext(this); diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index dcfa93c3e9..872678a41c 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -38,8 +38,8 @@ ThreadState::ThreadState(int _cpuId, int _tid) profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), funcExeInst(0), storeCondFailures(0) #else -ThreadState::ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid) +ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem) : cpuId(_cpuId), tid(_tid), lastActivate(0), lastSuspend(0), process(_process), asid(_asid), funcExeInst(0), storeCondFailures(0) diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index de9b2f14e4..cb1449ac5c 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -61,8 +61,8 @@ struct ThreadState { #if FULL_SYSTEM ThreadState(int _cpuId, int _tid); #else - ThreadState(int _cpuId, int _tid, MemObject *mem, - Process *_process, short _asid); + ThreadState(int _cpuId, int _tid, Process *_process, + short _asid, MemObject *mem); #endif void setCpuId(int id) { cpuId = id; } From 17f870f6d813df787baea116afb6f6af3897bc57 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 23:33:26 -0400 Subject: [PATCH 060/152] Changes to get OzoneCPU to compile once more. The changes largely are fixing up the memory accesses to use ports/Requests/Packets, supporting the splitting off of instantiation of template classes, and handling some of the reorganization that happened. OzoneCPU is untested for now but at least compiles. Fixes will be coming shortly. SConstruct: Remove OzoneSimpleCPU from list of CPUs. src/cpu/SConscript: Leave out OzoneSimpleCPU. src/cpu/ozone/bpred_unit.cc: Fixes to get OzoneCPU to compile. src/cpu/ozone/checker_builder.cc: src/cpu/ozone/cpu.cc: src/cpu/ozone/cpu.hh: src/cpu/ozone/cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/dyn_inst_impl.hh: src/cpu/ozone/front_end.cc: src/cpu/ozone/front_end.hh: src/cpu/ozone/front_end_impl.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_back_end_impl.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/lw_lsq_impl.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/rename_table.cc: src/cpu/ozone/simple_params.hh: src/cpu/ozone/thread_state.hh: Fixes to get OzoneCPU back to compiling. --HG-- extra : convert_revision : 90ffb397263bcf9fea3987317272c64f2b20f7e6 --- SConstruct | 2 +- src/cpu/SConscript | 17 +- src/cpu/ozone/base_dyn_inst.cc | 39 +++ src/cpu/ozone/bpred_unit.cc | 4 +- src/cpu/ozone/checker_builder.cc | 15 +- src/cpu/ozone/cpu.cc | 4 +- src/cpu/ozone/cpu.hh | 65 ++-- src/cpu/ozone/cpu_builder.cc | 426 +------------------------- src/cpu/ozone/cpu_impl.hh | 99 +++--- src/cpu/ozone/dyn_inst.hh | 56 ++-- src/cpu/ozone/dyn_inst_impl.hh | 24 +- src/cpu/ozone/front_end.cc | 4 +- src/cpu/ozone/front_end.hh | 92 +++--- src/cpu/ozone/front_end_impl.hh | 177 ++++++----- src/cpu/ozone/lw_back_end.hh | 48 +-- src/cpu/ozone/lw_back_end_impl.hh | 122 ++------ src/cpu/ozone/lw_lsq.hh | 141 ++++++--- src/cpu/ozone/lw_lsq_impl.hh | 363 +++++++++++++--------- src/cpu/ozone/ozone_impl.hh | 2 +- src/cpu/ozone/rename_table.cc | 4 +- src/cpu/ozone/simple_cpu_builder.cc | 452 ++++++++++++++++++++++++++++ src/cpu/ozone/simple_params.hh | 9 +- src/cpu/ozone/thread_state.hh | 67 +---- 23 files changed, 1173 insertions(+), 1059 deletions(-) create mode 100644 src/cpu/ozone/base_dyn_inst.cc create mode 100644 src/cpu/ozone/simple_cpu_builder.cc diff --git a/SConstruct b/SConstruct index 0a7b6c6dcc..2dc53f7cb3 100644 --- a/SConstruct +++ b/SConstruct @@ -264,7 +264,7 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] # Define the universe of supported CPU models env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', 'FullCPU', 'AlphaO3CPU', - 'OzoneSimpleCPU', 'OzoneCPU'] + 'OzoneCPU'] # Sticky options get saved in the options file so they persist from # one invocation to the next (unless overridden, in which case the new diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 0b0bf46921..baa5d531e4 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -161,24 +161,17 @@ if 'AlphaO3CPU' in env['CPU_MODELS']: if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') -if 'OzoneSimpleCPU' in env['CPU_MODELS']: - sources += Split(''' - ozone/cpu.cc - ozone/cpu_builder.cc - ozone/dyn_inst.cc - ozone/front_end.cc - ozone/inorder_back_end.cc - ozone/inst_queue.cc - ozone/rename_table.cc - ''') - if 'OzoneCPU' in env['CPU_MODELS']: sources += Split(''' ozone/base_dyn_inst.cc ozone/bpred_unit.cc - ozone/lsq_unit.cc + ozone/cpu.cc + ozone/cpu_builder.cc + ozone/dyn_inst.cc + ozone/front_end.cc ozone/lw_back_end.cc ozone/lw_lsq.cc + ozone/rename_table.cc ''') if env['USE_CHECKER']: sources += Split('ozone/checker_builder.cc') diff --git a/src/cpu/ozone/base_dyn_inst.cc b/src/cpu/ozone/base_dyn_inst.cc new file mode 100644 index 0000000000..5a3a69dff8 --- /dev/null +++ b/src/cpu/ozone/base_dyn_inst.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/base_dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +// Explicit instantiation +template class BaseDynInst; + +template <> +int +BaseDynInst::instcount = 0; diff --git a/src/cpu/ozone/bpred_unit.cc b/src/cpu/ozone/bpred_unit.cc index 835324ce1b..c823f5e80c 100644 --- a/src/cpu/ozone/bpred_unit.cc +++ b/src/cpu/ozone/bpred_unit.cc @@ -30,7 +30,7 @@ #include "cpu/o3/bpred_unit_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class BPredUnit; -template class BPredUnit; +//template class BPredUnit; diff --git a/src/cpu/ozone/checker_builder.cc b/src/cpu/ozone/checker_builder.cc index f6786f24b9..c372e51d69 100644 --- a/src/cpu/ozone/checker_builder.cc +++ b/src/cpu/ozone/checker_builder.cc @@ -34,18 +34,20 @@ #include "cpu/inst_seq.hh" #include "cpu/ozone/dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" -#include "mem/base_mem.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" +class MemObject; + template class Checker > >; /** * Specific non-templated derived class used for SimObject configuration. */ -class OzoneChecker : public Checker > > +class OzoneChecker : + public Checker > > { public: OzoneChecker(Params *p) @@ -67,7 +69,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM SimObjectParam itb; SimObjectParam dtb; - SimObjectParam mem; SimObjectParam system; Param cpu_id; Param profile; @@ -75,8 +76,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) SimObjectParam workload; #endif // FULL_SYSTEM Param clock; - SimObjectParam icache; - SimObjectParam dcache; Param defer_registration; Param exitOnError; @@ -100,7 +99,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(mem, "memory"), INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), @@ -109,8 +107,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) #endif // FULL_SYSTEM INIT_PARAM(clock, "clock speed"), - INIT_PARAM(icache, "L1 instruction cache object"), - INIT_PARAM(dcache, "L1 data cache object"), INIT_PARAM(defer_registration, "defer system registration (for sampling)"), INIT_PARAM(exitOnError, "exit on error"), @@ -144,13 +140,10 @@ CREATE_SIM_OBJECT(OzoneChecker) temp = max_insts_all_threads; temp = max_loads_any_thread; temp = max_loads_all_threads; - BaseMem *cache = icache; - cache = dcache; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->mem = mem; params->system = system; params->cpu_id = cpu_id; params->profile = profile; diff --git a/src/cpu/ozone/cpu.cc b/src/cpu/ozone/cpu.cc index 303c78eea2..eb6ac37bd3 100644 --- a/src/cpu/ozone/cpu.cc +++ b/src/cpu/ozone/cpu.cc @@ -31,7 +31,7 @@ #include "cpu/ozone/cpu_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" -template class OzoneCPU; +//template class OzoneCPU; template class OzoneCPU; diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index e9550c39b8..cacc84786a 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -43,6 +43,7 @@ #include "cpu/ozone/thread_state.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" +#include "mem/page_table.hh" #include "sim/eventq.hh" // forward declarations @@ -70,6 +71,7 @@ class Process; class Checkpoint; class EndQuiesceEvent; +class MemObject; class Request; namespace Trace { @@ -111,7 +113,7 @@ class OzoneCPU : public BaseCPU void setCpuId(int id); - int readCpuId() { return thread->cpuId; } + int readCpuId() { return thread->readCpuId(); } #if FULL_SYSTEM System *getSystemPtr() { return cpu->system; } @@ -122,22 +124,22 @@ class OzoneCPU : public BaseCPU AlphaDTB * getDTBPtr() { return cpu->dtb; } - Kernel::Statistics *getKernelStats() { return thread->kernelStats; } + Kernel::Statistics *getKernelStats() + { return thread->getKernelStats(); } FunctionalPort *getPhysPort() { return thread->getPhysPort(); } VirtualPort *getVirtPort(ThreadContext *tc = NULL) { return thread->getVirtPort(tc); } - void delVirtPort(VirtualPort *vp) - { thread->delVirtPort(vp); } + void delVirtPort(VirtualPort *vp); #else - TranslatingPort *getMemPort() { return thread->port; } + TranslatingPort *getMemPort() { return thread->getMemPort(); } - Process *getProcessPtr() { return thread->process; } + Process *getProcessPtr() { return thread->getProcessPtr(); } #endif - Status status() const { return thread->_status; } + Status status() const { return thread->status(); } void setStatus(Status new_status); @@ -250,7 +252,7 @@ class OzoneCPU : public BaseCPU { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); } void setSyscallReturn(SyscallReturn return_value) - { cpu->setSyscallReturn(return_value, thread->tid); } + { cpu->setSyscallReturn(return_value, thread->readTid()); } Counter readFuncExeInst() { return thread->funcExeInst; } @@ -374,6 +376,8 @@ class OzoneCPU : public BaseCPU PhysicalMemory *physmem; #endif + MemObject *mem; + FrontEnd *frontEnd; BackEnd *backEnd; @@ -415,50 +419,41 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM - bool validInstAddr(Addr addr) { return true; } - bool validDataAddr(Addr addr) { return true; } - - Fault translateInstReq(Request *req) + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, OzoneThreadState *thread) { - return itb->translate(req, tc); + return itb->translate(req, thread->getTC()); } - Fault translateDataReadReq(Request *req) + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState *thread) { - return dtb->translate(req, tc, false); + return dtb->translate(req, thread->getTC(), false); } - Fault translateDataWriteReq(Request *req) + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState *thread) { - return dtb->translate(req, tc, true); + return dtb->translate(req, thread->getTC(), true); } #else - bool validInstAddr(Addr addr) - { return true; } - - bool validDataAddr(Addr addr) - { return true; } - - int getInstAsid() { return thread.asid; } - int getDataAsid() { return thread.asid; } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(Request *req) + Fault translateInstReq(RequestPtr &req, OzoneThreadState *thread) { - return thread.translateInstReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(Request *req) + Fault translateDataReadReq(RequestPtr &req, OzoneThreadState *thread) { - return thread.translateDataReadReq(req); + return thread->getProcessPtr()->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(Request *req) + Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState *thread) { - return thread.translateDataWriteReq(req); + return thread->getProcessPtr()->pTable->translate(req); } #endif @@ -599,14 +594,14 @@ class OzoneCPU : public BaseCPU #if FULL_SYSTEM Fault hwrei(); - int readIntrFlag() { return thread.regs.intrflag; } - void setIntrFlag(int val) { thread.regs.intrflag = val; } + int readIntrFlag() { return thread.intrflag; } + void setIntrFlag(int val) { thread.intrflag = val; } bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } bool simPalCheck(int palFunc); void processInterrupts(); #else - void syscall(); + void syscall(uint64_t &callnum); void setSyscallReturn(SyscallReturn return_value, int tid); #endif diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc index 18f257a25f..e239b7a94b 100644 --- a/src/cpu/ozone/cpu_builder.cc +++ b/src/cpu/ozone/cpu_builder.cc @@ -34,9 +34,7 @@ #include "cpu/inst_seq.hh" #include "cpu/ozone/cpu.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" #include "cpu/ozone/simple_params.hh" -#include "mem/cache/base_cache.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" @@ -49,14 +47,6 @@ class DerivOzoneCPU : public OzoneCPU { } }; -class SimpleOzoneCPU : public OzoneCPU -{ - public: - SimpleOzoneCPU(SimpleParams *p) - : OzoneCPU(p) - { } -}; - //////////////////////////////////////////////////////////////////////// // @@ -78,7 +68,7 @@ SimObjectVectorParam workload; //SimObjectParam page_table; #endif // FULL_SYSTEM -SimObjectParam mem; +SimObjectParam mem; SimObjectParam checker; @@ -87,8 +77,8 @@ Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; -SimObjectParam icache; -SimObjectParam dcache; +//SimObjectParam icache; +//SimObjectParam dcache; Param cachePorts; Param width; @@ -215,8 +205,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) "count", 0), - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), +// INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), +// INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), INIT_PARAM_DFLT(width, "Width", 1), @@ -361,8 +351,8 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) // // Caches // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; +// params->icacheInterface = icache ? icache->getInterface() : NULL; +// params->dcacheInterface = dcache ? dcache->getInterface() : NULL; params->cachePorts = cachePorts; params->width = width; @@ -459,405 +449,3 @@ CREATE_SIM_OBJECT(DerivOzoneCPU) } REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) - - - -//////////////////////////////////////////////////////////////////////// -// -// OzoneCPU Simulation Object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - Param clock; - Param numThreads; - -#if FULL_SYSTEM -SimObjectParam system; -Param cpu_id; -SimObjectParam itb; -SimObjectParam dtb; -#else -SimObjectVectorParam workload; -//SimObjectParam page_table; -#endif // FULL_SYSTEM - -SimObjectParam mem; - -SimObjectParam checker; - -Param max_insts_any_thread; -Param max_insts_all_threads; -Param max_loads_any_thread; -Param max_loads_all_threads; - -SimObjectParam icache; -SimObjectParam dcache; - -Param cachePorts; -Param width; -Param frontEndWidth; -Param backEndWidth; -Param backEndSquashLatency; -Param backEndLatency; -Param maxInstBufferSize; -Param numPhysicalRegs; - -Param decodeToFetchDelay; -Param renameToFetchDelay; -Param iewToFetchDelay; -Param commitToFetchDelay; -Param fetchWidth; - -Param renameToDecodeDelay; -Param iewToDecodeDelay; -Param commitToDecodeDelay; -Param fetchToDecodeDelay; -Param decodeWidth; - -Param iewToRenameDelay; -Param commitToRenameDelay; -Param decodeToRenameDelay; -Param renameWidth; - -Param commitToIEWDelay; -Param renameToIEWDelay; -Param issueToExecuteDelay; -Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; -Param executeBranchWidth; -Param executeMemoryWidth; - -Param iewToCommitDelay; -Param renameToROBDelay; -Param commitWidth; -Param squashWidth; - -Param predType; -Param localPredictorSize; -Param localCtrBits; -Param localHistoryTableSize; -Param localHistoryBits; -Param globalPredictorSize; -Param globalCtrBits; -Param globalHistoryBits; -Param choicePredictorSize; -Param choiceCtrBits; - -Param BTBEntries; -Param BTBTagSize; - -Param RASSize; - -Param LQEntries; -Param SQEntries; -Param LFSTSize; -Param SSITSize; - -Param numPhysIntRegs; -Param numPhysFloatRegs; -Param numIQEntries; -Param numROBEntries; - -Param decoupledFrontEnd; -Param dispatchWidth; -Param wbWidth; - -Param smtNumFetchingThreads; -Param smtFetchPolicy; -Param smtLSQPolicy; -Param smtLSQThreshold; -Param smtIQPolicy; -Param smtIQThreshold; -Param smtROBPolicy; -Param smtROBThreshold; -Param smtCommitPolicy; - -Param instShiftAmt; - -Param defer_registration; - -Param function_trace; -Param function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#if FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), -#else - INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(checker, "Checker CPU", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), - INIT_PARAM_DFLT(width, "Width", 1), - INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), - INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), - INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), - INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), - INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), - INIT_PARAM(numPhysicalRegs, "Number of physical registers"), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), - INIT_PARAM(localPredictorSize, "Size of local predictor"), - INIT_PARAM(localCtrBits, "Bits per counter"), - INIT_PARAM(localHistoryTableSize, "Size of local history table"), - INIT_PARAM(localHistoryBits, "Bits for the local history"), - INIT_PARAM(globalPredictorSize, "Size of global predictor"), - INIT_PARAM(globalCtrBits, "Bits per counter"), - INIT_PARAM(globalHistoryBits, "Bits of history"), - INIT_PARAM(choicePredictorSize, "Size of choice predictor"), - INIT_PARAM(choiceCtrBits, "Bits of choice counters"), - - INIT_PARAM(BTBEntries, "Number of BTB entries"), - INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), - - INIT_PARAM(RASSize, "RAS size"), - - INIT_PARAM(LQEntries, "Number of load queue entries"), - INIT_PARAM(SQEntries, "Number of store queue entries"), - INIT_PARAM(LFSTSize, "Last fetched store table size"), - INIT_PARAM(SSITSize, "Store set ID table size"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), - INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), - INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), - - INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), - INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), - INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), - INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), - INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), - INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), - INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), - - INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -CREATE_SIM_OBJECT(SimpleOzoneCPU) -{ - SimpleOzoneCPU *cpu; - -#if FULL_SYSTEM - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - -#endif - - SimpleParams *params = new SimpleParams; - - params->clock = clock; - - params->name = getInstanceName(); - params->numberOfThreads = actual_num_threads; - -#if FULL_SYSTEM - params->system = system; - params->cpu_id = cpu_id; - params->itb = itb; - params->dtb = dtb; -#else - params->workload = workload; -// params->pTable = page_table; -#endif // FULL_SYSTEM - - params->mem = mem; - params->checker = checker; - params->max_insts_any_thread = max_insts_any_thread; - params->max_insts_all_threads = max_insts_all_threads; - params->max_loads_any_thread = max_loads_any_thread; - params->max_loads_all_threads = max_loads_all_threads; - - // - // Caches - // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; - params->cachePorts = cachePorts; - - params->width = width; - params->frontEndWidth = frontEndWidth; - params->backEndWidth = backEndWidth; - params->backEndSquashLatency = backEndSquashLatency; - params->backEndLatency = backEndLatency; - params->maxInstBufferSize = maxInstBufferSize; - params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; - - params->decodeToFetchDelay = decodeToFetchDelay; - params->renameToFetchDelay = renameToFetchDelay; - params->iewToFetchDelay = iewToFetchDelay; - params->commitToFetchDelay = commitToFetchDelay; - params->fetchWidth = fetchWidth; - - params->renameToDecodeDelay = renameToDecodeDelay; - params->iewToDecodeDelay = iewToDecodeDelay; - params->commitToDecodeDelay = commitToDecodeDelay; - params->fetchToDecodeDelay = fetchToDecodeDelay; - params->decodeWidth = decodeWidth; - - params->iewToRenameDelay = iewToRenameDelay; - params->commitToRenameDelay = commitToRenameDelay; - params->decodeToRenameDelay = decodeToRenameDelay; - params->renameWidth = renameWidth; - - params->commitToIEWDelay = commitToIEWDelay; - params->renameToIEWDelay = renameToIEWDelay; - params->issueToExecuteDelay = issueToExecuteDelay; - params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; - - params->iewToCommitDelay = iewToCommitDelay; - params->renameToROBDelay = renameToROBDelay; - params->commitWidth = commitWidth; - params->squashWidth = squashWidth; - - params->predType = predType; - params->localPredictorSize = localPredictorSize; - params->localCtrBits = localCtrBits; - params->localHistoryTableSize = localHistoryTableSize; - params->localHistoryBits = localHistoryBits; - params->globalPredictorSize = globalPredictorSize; - params->globalCtrBits = globalCtrBits; - params->globalHistoryBits = globalHistoryBits; - params->choicePredictorSize = choicePredictorSize; - params->choiceCtrBits = choiceCtrBits; - - params->BTBEntries = BTBEntries; - params->BTBTagSize = BTBTagSize; - - params->RASSize = RASSize; - - params->LQEntries = LQEntries; - params->SQEntries = SQEntries; - - params->SSITSize = SSITSize; - params->LFSTSize = LFSTSize; - - params->numPhysIntRegs = numPhysIntRegs; - params->numPhysFloatRegs = numPhysFloatRegs; - params->numIQEntries = numIQEntries; - params->numROBEntries = numROBEntries; - - params->decoupledFrontEnd = decoupledFrontEnd; - params->dispatchWidth = dispatchWidth; - params->wbWidth = wbWidth; - - params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; - params->smtIQPolicy = smtIQPolicy; - params->smtLSQPolicy = smtLSQPolicy; - params->smtLSQThreshold = smtLSQThreshold; - params->smtROBPolicy = smtROBPolicy; - params->smtROBThreshold = smtROBThreshold; - params->smtCommitPolicy = smtCommitPolicy; - - params->instShiftAmt = 2; - - params->deferRegistration = defer_registration; - - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - - cpu = new SimpleOzoneCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) - diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 76e2318aa7..2b25ad124d 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -29,21 +29,17 @@ * Nathan Binkert */ -//#include -//#include +#include "config/full_system.hh" +#include "config/use_checker.hh" #include "arch/isa_traits.hh" // For MachInst #include "base/trace.hh" -#include "config/full_system.hh" #include "cpu/base.hh" -#include "cpu/checker/thread_context.hh" #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/cpu.hh" #include "cpu/quiesce_event.hh" #include "cpu/static_inst.hh" -//#include "mem/base_mem.hh" -#include "mem/mem_interface.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" @@ -56,17 +52,18 @@ //#include "base/remote_gdb.hh" #include "cpu/profile.hh" #include "kern/kernel_stats.hh" -#include "mem/functional/memory_control.hh" -#include "mem/functional/physical.hh" #include "sim/faults.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" #include "sim/system.hh" #else // !FULL_SYSTEM -#include "mem/functional/functional.hh" #include "sim/process.hh" #endif // FULL_SYSTEM +#if USE_CHECKER +#include "cpu/checker/thread_context.hh" +#endif + using namespace TheISA; template @@ -101,13 +98,12 @@ OzoneCPU::TickEvent::description() template OzoneCPU::OzoneCPU(Params *p) #if FULL_SYSTEM - : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), - mem(p->mem), + : BaseCPU(p), thread(this, 0), tickEvent(this, p->width), #else - : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), - mem(p->workload[0]->getMemory()), + : BaseCPU(p), thread(this, 0, p->workload[0], 0, p->mem), + tickEvent(this, p->width), #endif - comm(5, 5) + mem(p->mem), comm(5, 5) { frontEnd = new FrontEnd(p); backEnd = new BackEnd(p); @@ -115,6 +111,7 @@ OzoneCPU::OzoneCPU(Params *p) _status = Idle; if (p->checker) { +#if USE_CHECKER BaseCPU *temp_checker = p->checker; checker = dynamic_cast *>(temp_checker); checker->setMemory(mem); @@ -123,7 +120,10 @@ OzoneCPU::OzoneCPU(Params *p) #endif checkerTC = new CheckerThreadContext(&ozoneTC, checker); thread.tc = checkerTC; - tc = checkerXC; + tc = checkerTC; +#else + panic("Checker enabled but not compiled in!"); +#endif } else { checker = NULL; thread.tc = &ozoneTC; @@ -139,15 +139,13 @@ OzoneCPU::OzoneCPU(Params *p) #if FULL_SYSTEM /***** All thread state stuff *****/ thread.cpu = this; - thread.tid = 0; - thread.mem = p->mem; + thread.setTid(0); thread.quiesceEvent = new EndQuiesceEvent(tc); system = p->system; itb = p->itb; dtb = p->dtb; - memctrl = p->system->memctrl; physmem = p->system->physmem; if (p->profile) { @@ -166,9 +164,6 @@ OzoneCPU::OzoneCPU(Params *p) thread.profilePC = 3; #else thread.cpu = this; - thread.tid = 0; - thread.process = p->workload[0]; - thread.asid = 0; #endif // !FULL_SYSTEM numInst = 0; @@ -237,8 +232,11 @@ OzoneCPU::signalSwitched() if (++switchCount == 2) { backEnd->doSwitchOut(); frontEnd->doSwitchOut(); +#if USE_CHECKER if (checker) checker->switchOut(sampler); +#endif + _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); @@ -291,7 +289,7 @@ OzoneCPU::activateContext(int thread_num, int delay) notIdleFraction++; scheduleTickEvent(delay); _status = Running; - thread._status = ThreadContext::Active; + thread.setStatus(ThreadContext::Active); frontEnd->wakeFromQuiesce(); } @@ -510,7 +508,7 @@ template Addr OzoneCPU::dbg_vtophys(Addr addr) { - return vtophys(tcProxy, addr); + return vtophys(tc, addr); } #endif // FULL_SYSTEM @@ -526,7 +524,7 @@ OzoneCPU::post_interrupt(int int_num, int index) // thread.activate(); // Hack for now. Otherwise might have to go through the tc, or // I need to figure out what's the right thing to call. - activateContext(thread.tid, 1); + activateContext(thread.readTid(), 1); } } #endif // FULL_SYSTEM @@ -565,7 +563,7 @@ OzoneCPU::squashFromTC() #if !FULL_SYSTEM template void -OzoneCPU::syscall() +OzoneCPU::syscall(uint64_t &callnum) { // Not sure this copy is needed, depending on how the TC proxy is made. thread.renameTable.copyFrom(backEnd->renameTable); @@ -576,7 +574,7 @@ OzoneCPU::syscall() DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); - thread.process->syscall(yc); + thread.process->syscall(callnum, tc); thread.funcExeInst--; @@ -690,9 +688,9 @@ OzoneCPU::simPalCheck(int palFunc) switch (palFunc) { case PAL::halt: - haltContext(thread.tid); + haltContext(thread.readTid()); if (--System::numSystemsRunning == 0) - new SimExitEvent("all cpus halted"); + exitSimLoop("all cpus halted"); break; case PAL::bpt: @@ -718,21 +716,31 @@ void OzoneCPU::OzoneTC::setCpuId(int id) { cpu->cpuId = id; - thread->cpuId = id; + thread->setCpuId(id); } +#if FULL_SYSTEM +template +void +OzoneCPU::OzoneTC::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + template void OzoneCPU::OzoneTC::setStatus(Status new_status) { - thread->_status = new_status; + thread->setStatus(new_status); } template void OzoneCPU::OzoneTC::activate(int delay) { - cpu->activateContext(thread->tid, delay); + cpu->activateContext(thread->readTid(), delay); } /// Set the status to Suspended. @@ -740,7 +748,7 @@ template void OzoneCPU::OzoneTC::suspend() { - cpu->suspendContext(thread->tid); + cpu->suspendContext(thread->readTid()); } /// Set the status to Unallocated. @@ -748,7 +756,7 @@ template void OzoneCPU::OzoneTC::deallocate() { - cpu->deallocateContext(thread->tid); + cpu->deallocateContext(thread->readTid()); } /// Set the status to Halted. @@ -756,7 +764,7 @@ template void OzoneCPU::OzoneTC::halt() { - cpu->haltContext(thread->tid); + cpu->haltContext(thread->readTid()); } #if FULL_SYSTEM @@ -771,7 +779,6 @@ void OzoneCPU::OzoneTC::takeOverFrom(ThreadContext *old_context) { // some things should already be set up - assert(getMemPtr() == old_context->getMemPtr()); #if FULL_SYSTEM assert(getSystemPtr() == old_context->getSystemPtr()); #else @@ -867,7 +874,7 @@ template int OzoneCPU::OzoneTC::getThreadNum() { - return thread->tid; + return thread->readTid(); } // Also somewhat obnoxious. Really only used for the TLB fault. @@ -875,7 +882,7 @@ template TheISA::MachInst OzoneCPU::OzoneTC::getInst() { - return thread->inst; + return thread->getInst(); } template @@ -894,7 +901,7 @@ OzoneCPU::OzoneTC::copyArchRegs(ThreadContext *tc) } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { int fp_idx = i - TheISA::FP_Base_DepTag; thread->renameTable[i]->setDoubleResult( - tc->readFloatRegDouble(fp_idx)); + tc->readFloatReg(fp_idx, 64)); } } @@ -904,7 +911,7 @@ OzoneCPU::OzoneTC::copyArchRegs(ThreadContext *tc) // Need to copy the TC values into the current rename table, // copy the misc regs. - thread->regs.miscRegs.copyMiscRegs(tc); + copyMiscRegs(tc, this); } template @@ -922,7 +929,7 @@ OzoneCPU::OzoneTC::readIntReg(int reg_idx) } template -float +TheISA::FloatReg OzoneCPU::OzoneTC::readFloatReg(int reg_idx, int width) { int idx = reg_idx + TheISA::FP_Base_DepTag; @@ -1049,15 +1056,15 @@ template TheISA::MiscReg OzoneCPU::OzoneTC::readMiscReg(int misc_reg) { - return thread->regs.miscRegs.readReg(misc_reg); + return thread->miscRegFile.readReg(misc_reg); } template TheISA::MiscReg OzoneCPU::OzoneTC::readMiscRegWithEffect(int misc_reg, Fault &fault) { - return thread->regs.miscRegs.readRegWithEffect(misc_reg, - fault, this); + return thread->miscRegFile.readRegWithEffect(misc_reg, + fault, this); } template @@ -1065,7 +1072,7 @@ Fault OzoneCPU::OzoneTC::setMiscReg(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val); + Fault ret_fault = thread->miscRegFile.setReg(misc_reg, val); if (!thread->inSyscall) { cpu->squashFromTC(); @@ -1079,8 +1086,8 @@ Fault OzoneCPU::OzoneTC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) { // Needs to setup a squash event unless we're in syscall mode - Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val, - this); + Fault ret_fault = thread->miscRegFile.setRegWithEffect(misc_reg, val, + this); if (!thread->inSyscall) { cpu->squashFromTC(); diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh index 0bb50bd69f..67691d416c 100644 --- a/src/cpu/ozone/dyn_inst.hh +++ b/src/cpu/ozone/dyn_inst.hh @@ -34,9 +34,8 @@ #include "arch/isa_traits.hh" #include "config/full_system.hh" #include "cpu/base_dyn_inst.hh" -#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/inst_seq.hh" -//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/cpu.hh" // MUST include this #include "cpu/ozone/ozone_impl.hh" #include @@ -47,15 +46,17 @@ class OzoneDynInst : public BaseDynInst { public: // Typedefs - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; - typedef typename FullCPU::ImplState ImplState; + typedef typename OzoneCPU::ImplState ImplState; // Typedef for DynInstPtr. This is really just a RefCountingPtr. typedef typename Impl::DynInstPtr DynInstPtr; typedef TheISA::ExtMachInst ExtMachInst; typedef TheISA::MachInst MachInst; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; typedef typename std::list::iterator ListIt; @@ -67,10 +68,10 @@ class OzoneDynInst : public BaseDynInst MaxInstDestRegs = TheISA::MaxInstDestRegs }; - OzoneDynInst(FullCPU *cpu); + OzoneDynInst(OzoneCPU *cpu); OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu); + InstSeqNum seq_num, OzoneCPU *cpu); OzoneDynInst(StaticInstPtr inst); @@ -131,7 +132,7 @@ class OzoneDynInst : public BaseDynInst Fault initiateAcc(); - Fault completeAcc(); + Fault completeAcc(Packet *pkt); // The register accessor methods provide the index of the // instruction's operand (e.g., 0 or 1), not the architectural @@ -149,17 +150,30 @@ class OzoneDynInst : public BaseDynInst return srcInsts[idx]->readIntResult(); } - float readFloatRegSingle(const StaticInst *si, int idx) + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + switch(width) { + case 32: + return srcInsts[idx]->readFloatResult(); + case 64: + return srcInsts[idx]->readDoubleResult(); + default: + panic("Width not supported"); + return 0; + } + } + + FloatReg readFloatReg(const StaticInst *si, int idx) { return srcInsts[idx]->readFloatResult(); } - double readFloatRegDouble(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) { - return srcInsts[idx]->readDoubleResult(); + return srcInsts[idx]->readIntResult(); } - uint64_t readFloatRegInt(const StaticInst *si, int idx) + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) { return srcInsts[idx]->readIntResult(); } @@ -172,19 +186,25 @@ class OzoneDynInst : public BaseDynInst BaseDynInst::setIntReg(si, idx, val); } - void setFloatRegSingle(const StaticInst *si, int idx, float val) + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) { - BaseDynInst::setFloatRegSingle(si, idx, val); + BaseDynInst::setFloatReg(si, idx, val, width); } - void setFloatRegDouble(const StaticInst *si, int idx, double val) + void setFloatReg(const StaticInst *si, int idx, FloatReg val) { - BaseDynInst::setFloatRegDouble(si, idx, val); + BaseDynInst::setFloatReg(si, idx, val); } - void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) { - BaseDynInst::setFloatRegInt(si, idx, val); + BaseDynInst::setFloatRegBits(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + BaseDynInst::setFloatRegBits(si, idx, val); } void setIntResult(uint64_t result) { this->instResult.integer = result; } @@ -223,7 +243,7 @@ class OzoneDynInst : public BaseDynInst void trap(Fault fault); bool simPalCheck(int palFunc); #else - void syscall(); + void syscall(uint64_t &callnum); #endif ListIt iqIt; diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh index 4149bf144b..bad902c2aa 100644 --- a/src/cpu/ozone/dyn_inst_impl.hh +++ b/src/cpu/ozone/dyn_inst_impl.hh @@ -37,7 +37,7 @@ using namespace TheISA; template -OzoneDynInst::OzoneDynInst(FullCPU *cpu) +OzoneDynInst::OzoneDynInst(OzoneCPU *cpu) : BaseDynInst(0, 0, 0, 0, cpu) { this->setResultReady(); @@ -47,7 +47,7 @@ OzoneDynInst::OzoneDynInst(FullCPU *cpu) template OzoneDynInst::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) + InstSeqNum seq_num, OzoneCPU *cpu) : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) { initInstPtrs(); @@ -111,19 +111,9 @@ OzoneDynInst::initiateAcc() template Fault -OzoneDynInst::completeAcc() +OzoneDynInst::completeAcc(Packet *pkt) { - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, - this->traceData); - } else { - panic("Unknown type!"); - } + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); return this->fault; } @@ -298,7 +288,7 @@ template void OzoneDynInst::trap(Fault fault) { - fault->invoke(this->thread->getXCProxy()); + fault->invoke(this->thread->getTC()); } template @@ -310,8 +300,8 @@ OzoneDynInst::simPalCheck(int palFunc) #else template void -OzoneDynInst::syscall() +OzoneDynInst::syscall(uint64_t &callnum) { - this->cpu->syscall(); + this->cpu->syscall(callnum); } #endif diff --git a/src/cpu/ozone/front_end.cc b/src/cpu/ozone/front_end.cc index f0ea8eae15..cfd0335649 100644 --- a/src/cpu/ozone/front_end.cc +++ b/src/cpu/ozone/front_end.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/front_end_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class FrontEnd; -template class FrontEnd; +//template class FrontEnd; diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index af190008c7..af310efc32 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -33,9 +33,11 @@ #include +#include "arch/utility.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/bpred_unit.hh" #include "cpu/ozone/rename_table.hh" +#include "mem/port.hh" #include "mem/request.hh" #include "sim/eventq.hh" #include "sim/stats.hh" @@ -55,17 +57,55 @@ class FrontEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef typename Impl::BackEnd BackEnd; - typedef typename Impl::FullCPU::OzoneTC OzoneTC; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::CPUType::OzoneTC OzoneTC; + typedef typename Impl::CPUType::CommStruct CommStruct; + + /** IcachePort class. Handles doing the communication with the + * cache/memory. + */ + class IcachePort : public Port + { + protected: + /** Pointer to FE. */ + FrontEnd *fe; + + public: + /** Default constructor. */ + IcachePort(FrontEnd *_fe) + : Port(_fe->name() + "-iport"), fe(_fe) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles setting fetch to the + * proper status to start fetching. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of a failed fetch. */ + virtual void recvRetry(); + }; FrontEnd(Params *params); std::string name() const; - void setCPU(FullCPU *cpu_ptr) + void setCPU(CPUType *cpu_ptr) { cpu = cpu_ptr; } void setBackEnd(BackEnd *back_end_ptr) @@ -104,6 +144,8 @@ class FrontEnd bool switchedOut; private: + void recvRetry(); + bool updateStatus(); void checkBE(); @@ -130,7 +172,7 @@ class FrontEnd { return cpu->globalSeqNum++; } public: - FullCPU *cpu; + CPUType *cpu; BackEnd *backEnd; @@ -141,8 +183,9 @@ class FrontEnd enum Status { Running, Idle, - IcacheMissStall, - IcacheMissComplete, + IcacheWaitResponse, + IcacheWaitRetry, + IcacheAccessComplete, SerializeBlocked, SerializeComplete, RenameBlocked, @@ -161,38 +204,8 @@ class FrontEnd BranchPred branchPred; - class IcachePort : public Port - { - protected: - FrontEnd *fe; - - public: - IcachePort(const std::string &_name, FrontEnd *_fe) - : Port(_name), fe(_fe) - { } - - protected: - virtual Tick recvAtomic(PacketPtr pkt); - - virtual void recvFunctional(PacketPtr pkt); - - virtual void recvStatusChange(Status status); - - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - virtual bool recvTiming(PacketPtr pkt); - - virtual void recvRetry(); - }; - IcachePort icachePort; -#if !FULL_SYSTEM - PageTable *pTable; -#endif - RequestPtr memReq; /** Mask to get a cache block's address. */ @@ -209,6 +222,11 @@ class FrontEnd bool cacheBlkValid; + bool cacheBlocked; + + /** The packet that is waiting to be retried. */ + PacketPtr retryPkt; + public: RenameTable renameTable; diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 8082e01b9b..b1bc325c73 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -34,14 +34,55 @@ #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" -#include "mem/mem_interface.hh" +#include "mem/packet.hh" +#include "mem/request.hh" using namespace TheISA; +template +Tick +FrontEnd::IcachePort::recvAtomic(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvAtomic callback!"); + return curTick; +} + +template +void +FrontEnd::IcachePort::recvFunctional(PacketPtr pkt) +{ + panic("FrontEnd doesn't expect recvFunctional callback!"); +} + +template +void +FrontEnd::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("FrontEnd doesn't expect recvStatusChange callback!"); +} + +template +bool +FrontEnd::IcachePort::recvTiming(Packet *pkt) +{ + fe->processCacheCompletion(pkt); + return true; +} + +template +void +FrontEnd::IcachePort::recvRetry() +{ + fe->recvRetry(); +} + template FrontEnd::FrontEnd(Params *params) : branchPred(params), - icacheInterface(params->icacheInterface), + icachePort(this), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), width(params->frontEndWidth), @@ -56,7 +97,7 @@ FrontEnd::FrontEnd(Params *params) memReq = NULL; // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = 64; assert(isPowerOf2(cacheBlkSize)); @@ -68,11 +109,10 @@ FrontEnd::FrontEnd(Params *params) fetchCacheLineNextCycle = true; - cacheBlkValid = false; + cacheBlkValid = cacheBlocked = false; + + retryPkt = NULL; -#if !FULL_SYSTEM -// pTable = params->pTable; -#endif fetchFault = NoFault; } @@ -271,7 +311,7 @@ FrontEnd::tick() IFQFcount += instBufferSize == maxInstBufferSize; // Fetch cache line - if (status == IcacheMissComplete) { + if (status == IcacheAccessComplete) { cacheBlkValid = true; status = Running; @@ -280,8 +320,8 @@ FrontEnd::tick() if (freeRegs <= 0) status = RenameBlocked; checkBE(); - } else if (status == IcacheMissStall) { - DPRINTF(FE, "Still in Icache miss stall.\n"); + } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) { + DPRINTF(FE, "Still in Icache wait.\n"); icacheStallCycles++; return; } @@ -302,7 +342,7 @@ FrontEnd::tick() } else if (status == QuiescePending) { DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); return; - } else if (status != IcacheMissComplete) { + } else if (status != IcacheAccessComplete) { if (fetchCacheLineNextCycle) { Fault fault = fetchCacheLine(); if (fault != NoFault) { @@ -313,7 +353,7 @@ FrontEnd::tick() fetchCacheLineNextCycle = false; } // If miss, stall until it returns. - if (status == IcacheMissStall) { + if (status == IcacheWaitResponse || status == IcacheWaitRetry) { // Tell CPU to not tick me for now. return; } @@ -403,22 +443,16 @@ FrontEnd::fetchCacheLine() // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. - memReq = new MemReq(); - - memReq->asid = 0; - memReq->thread_num = 0; - memReq->data = new uint8_t[64]; - memReq->tc = tc; - memReq->cmd = Read; - memReq->reset(fetch_PC, cacheBlkSize, flags); + memReq = new Request(0, fetch_PC, cacheBlkSize, flags, + fetch_PC, cpu->readCpuId(), 0); // Translate the instruction request. - fault = cpu->translateInstReq(memReq); + fault = cpu->translateInstReq(memReq, thread); // Now do the timing access to see whether or not the instruction // exists within the cache. - if (icacheInterface && fault == NoFault) { -#if FULL_SYSTEM + if (fault == NoFault) { +#if 0 if (cpu->system->memctrl->badaddr(memReq->paddr) || memReq->flags & UNCACHEABLE) { DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " @@ -428,30 +462,21 @@ FrontEnd::fetchCacheLine() } #endif - memReq->completionEvent = NULL; + // Build packet here. + PacketPtr data_pkt = new Packet(memReq, + Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(cacheData); - memReq->time = curTick; - fault = cpu->mem->read(memReq, cacheData); - - MemAccessResult res = icacheInterface->access(memReq); - - // If the cache missed then schedule an event to wake - // up this stage once the cache miss completes. - if (icacheInterface->doEvents() && res != MA_HIT) { - memReq->completionEvent = new ICacheCompletionEvent(memReq, this); - - status = IcacheMissStall; - - cacheBlkValid = false; - - DPRINTF(FE, "Cache miss.\n"); - } else { - DPRINTF(FE, "Cache hit.\n"); - - cacheBlkValid = true; - -// memcpy(cacheData, memReq->data, memReq->size); + if (!icachePort.sendTiming(data_pkt)) { + assert(retryPkt == NULL); + DPRINTF(Fetch, "Out of MSHRs!\n"); + status = IcacheWaitRetry; + retryPkt = data_pkt; + cacheBlocked = true; + return NoFault; } + + status = IcacheWaitResponse; } // Note that this will set the cache block PC a bit earlier than it should @@ -564,7 +589,7 @@ FrontEnd::handleFault(Fault &fault) // instruction->setASID(tid); - instruction->setState(thread); + instruction->setThreadState(thread); instruction->traceData = NULL; @@ -613,8 +638,8 @@ FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, } // Clear the icache miss if it's outstanding. - if (status == IcacheMissStall && icacheInterface) { - DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + if (status == IcacheWaitResponse) { + DPRINTF(FE, "Squashing outstanding Icache access.\n"); memReq = NULL; } @@ -651,20 +676,22 @@ FrontEnd::getInst() template void -FrontEnd::processCacheCompletion(MemReqPtr &req) +FrontEnd::processCacheCompletion(PacketPtr pkt) { DPRINTF(FE, "Processing cache completion\n"); // Do something here. - if (status != IcacheMissStall || - req != memReq || + if (status != IcacheWaitResponse || + pkt->req != memReq || switchedOut) { DPRINTF(FE, "Previous fetch was squashed.\n"); fetchIcacheSquashes++; + delete pkt->req; + delete pkt; return; } - status = IcacheMissComplete; + status = IcacheAccessComplete; /* if (checkStall(tid)) { fetchStatus[tid] = Blocked; @@ -676,6 +703,8 @@ FrontEnd::processCacheCompletion(MemReqPtr &req) // Reset the completion event to NULL. // memReq->completionEvent = NULL; + delete pkt->req; + delete pkt; memReq = NULL; } @@ -696,6 +725,27 @@ FrontEnd::addFreeRegs(int num_freed) freeRegs = numPhysRegs; } +template +void +FrontEnd::recvRetry() +{ + assert(cacheBlocked); + if (retryPkt != NULL) { + assert(status == IcacheWaitRetry); + + if (icachePort.sendTiming(retryPkt)) { + status = IcacheWaitResponse; + retryPkt = NULL; + cacheBlocked = false; + } + } else { + // Access has been squashed since it was sent out. Just clear + // the cache being blocked. + cacheBlocked = false; + } + +} + template bool FrontEnd::updateStatus() @@ -774,7 +824,7 @@ FrontEnd::getInstFromCacheline() DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), inst_seq, cpu); - instruction->setState(thread); + instruction->setThreadState(thread); DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", inst_seq, instruction->readPC(), @@ -898,24 +948,3 @@ FrontEnd::dumpInsts() buff_it++; } } - -template -FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) - : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -FrontEnd::ICacheCompletionEvent::process() -{ - frontEnd->processCacheCompletion(req); -} - -template -const char * -FrontEnd::ICacheCompletionEvent::description() -{ - return "ICache completion event"; -} diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index bb81f60c84..bb3ef3a72a 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -60,9 +60,9 @@ class LWBackEnd typedef typename Impl::Params Params; typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::FrontEnd FrontEnd; - typedef typename Impl::FullCPU::CommStruct CommStruct; + typedef typename Impl::OzoneCPU::CommStruct CommStruct; struct SizeStruct { int size; @@ -95,35 +95,13 @@ class LWBackEnd const char *description(); }; - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - LWBackEnd *be; - - bool dcacheMiss; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - - void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } - }; - LWBackEnd(Params *params); std::string name() const; void regStats(); - void setCPU(FullCPU *cpu_ptr); + void setCPU(OzoneCPU *cpu_ptr); void setFrontEnd(FrontEnd *front_end_ptr) { frontEnd = front_end_ptr; } @@ -239,7 +217,7 @@ class LWBackEnd void updateComInstStats(DynInstPtr &inst); public: - FullCPU *cpu; + OzoneCPU *cpu; FrontEnd *frontEnd; @@ -273,24 +251,6 @@ class LWBackEnd RenameTable renameTable; private: - class DCacheCompletionEvent : public Event - { - private: - LWBackEnd *be; - - public: - DCacheCompletionEvent(LWBackEnd *_be); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - // General back end width. Used if the more specific isn't given. int width; diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index ed406d5a3d..dcd7a0d7ed 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -28,9 +28,14 @@ * Authors: Kevin Lim */ -#include "cpu/checker/cpu.hh" +#include "config/use_checker.hh" + #include "cpu/ozone/lw_back_end.hh" -#include "encumbered/cpu/full/op_class.hh" +#include "cpu/op_class.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif template void @@ -134,86 +139,11 @@ LWBackEnd::replayMemInst(DynInstPtr &inst) assert(found_inst); } -template -LWBackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - LWBackEnd *_be) - : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -LWBackEnd::LdWritebackEvent::process() -{ - DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); -// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - -// iewStage->wakeCPU(); - - if (be->isSwitchedOut()) - return; - - if (dcacheMiss) { - be->removeDcacheMiss(inst); - } - - if (inst->isSquashed()) { - inst = NULL; - return; - } - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Execute again to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - be->instToCommit(inst); - - //wroteToTimeBuffer = true; -// iewStage->activityThisCycle(); - - inst = NULL; -} - -template -const char * -LWBackEnd::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - - -template -LWBackEnd::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -} - -template -void -LWBackEnd::DCacheCompletionEvent::process() -{ -} - -template -const char * -LWBackEnd::DCacheCompletionEvent::description() -{ - return "Cache completion event"; -} - template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), cacheCompletionEvent(this), - dcacheInterface(params->dcacheInterface), width(params->backEndWidth), - exactFullStall(true) + trapSquash(false), tcSquash(false), + width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; numInsts = 0; @@ -569,7 +499,7 @@ LWBackEnd::regStats() template void -LWBackEnd::setCPU(FullCPU *cpu_ptr) +LWBackEnd::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; LSQ.setCPU(cpu_ptr); @@ -639,7 +569,7 @@ LWBackEnd::handleFault(Fault &fault, Tick latency) // Consider holding onto the trap and waiting until the trap event // happens for this to be executed. - fault->invoke(thread->getTCProxy()); + fault->invoke(thread->getTC()); // Exit state update mode to avoid accidental updating. thread->inSyscall = false; @@ -929,11 +859,6 @@ LWBackEnd::executeInsts() // at the commit stage. if (inst->isMemRef() && (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - if (dcacheInterface->isBlocked()) { - // Should I move the instruction aside? - DPRINTF(BE, "Execute: dcache is blocked\n"); - break; - } DPRINTF(BE, "Execute: Initiating access for memory " "reference.\n"); @@ -941,7 +866,7 @@ LWBackEnd::executeInsts() LSQ.executeLoad(inst); } else if (inst->isStore()) { LSQ.executeStore(inst); - if (inst->req && !(inst->req->flags & LOCKED)) { + if (inst->req && !(inst->req->getFlags() & LOCKED)) { inst->setExecuted(); instToCommit(inst); @@ -1078,7 +1003,7 @@ LWBackEnd::commitInst(int inst_num) thread->setPC(inst->readPC()); thread->setNextPC(inst->readNextPC()); - inst->reachedCommit = true; + inst->setAtCommit(); // If the instruction is not executed yet, then it is a non-speculative // or store inst. Signal backwards that it should be executed. @@ -1183,9 +1108,11 @@ LWBackEnd::commitInst(int inst_num) // Use checker prior to updating anything due to traps or PC // based events. +#if USE_CHECKER if (checker) { - checker->tick(inst); + checker->verify(inst); } +#endif if (inst_fault != NoFault) { DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", @@ -1200,9 +1127,12 @@ LWBackEnd::commitInst(int inst_num) } else if (inst_num != 0) { DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); return false; - } else if (checker && inst->isStore()) { - checker->tick(inst); } +#if USE_CHECKER + else if (checker && inst->isStore()) { + checker->verify(inst); + } +#endif thread->setInst( static_cast(inst->staticInst->machInst)); @@ -1259,7 +1189,7 @@ LWBackEnd::commitInst(int inst_num) assert(!thread->inSyscall && !thread->trapPending); oldpc = thread->readPC(); cpu->system->pcEventQueue.service( - thread->getTCProxy()); + thread->getTC()); count++; } while (oldpc != thread->readPC()); if (count > 1) { @@ -1346,7 +1276,7 @@ LWBackEnd::squash(const InstSeqNum &sn) (*insts_it)->setCanCommit(); - (*insts_it)->removeInROB(); + (*insts_it)->clearInROB(); for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); @@ -1497,10 +1427,10 @@ LWBackEnd::doSwitchOut() template void -LWBackEnd::takeOverFrom(ThreadContext *old_xc) +LWBackEnd::takeOverFrom(ThreadContext *old_tc) { switchedOut = false; - xcSquash = false; + tcSquash = false; trapSquash = false; numInsts = 0; @@ -1510,7 +1440,7 @@ LWBackEnd::takeOverFrom(ThreadContext *old_xc) switchedOut = false; dispatchStatus = Running; commitStatus = Running; - LSQ.takeOverFrom(old_xc); + LSQ.takeOverFrom(old_tc); } template diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index b2924db54f..e0c1901345 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -47,7 +47,7 @@ #include "sim/debug.hh" #include "sim/sim_object.hh" -//class PageTable; +class MemObject; /** * Class that implements the actual LQ and SQ for each specific thread. @@ -64,7 +64,7 @@ template class OzoneLWLSQ { public: typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::OzoneCPU OzoneCPU; typedef typename Impl::BackEnd BackEnd; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::IssueStruct IssueStruct; @@ -73,35 +73,6 @@ class OzoneLWLSQ { typedef typename std::map::iterator LdMapIt; - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, - Event *wb_event, OzoneLWLSQ *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - private: - /** The store index of the store being written back. */ - DynInstPtr inst; - - BackEnd *be; - /** The writeback event for the store. Needed for store - * conditionals. - */ - public: - Event *wbEvent; - bool miss; - private: - /** The pointer to the LSQ unit that issued the store. */ - OzoneLWLSQ *lsqPtr; - }; - public: /** Constructs an LSQ unit. init() must be called prior to use. */ OzoneLWLSQ(); @@ -114,8 +85,7 @@ class OzoneLWLSQ { std::string name() const; /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(OzoneCPU *cpu_ptr); /** Sets the back-end stage pointer. */ void setBE(BackEnd *be_ptr) @@ -155,6 +125,10 @@ class OzoneLWLSQ { /** Writes back stores. */ void writebackStores(); + /** Completes the data access that has been returned from the + * memory system. */ + void completeDataAccess(PacketPtr pkt); + // @todo: Include stats in the LSQ unit. //void regStats(); @@ -231,8 +205,8 @@ class OzoneLWLSQ { /** Returns if the LSQ unit will writeback on this cycle. */ bool willWB() { return storeQueue.back().canWB && - !storeQueue.back().completed/* && - !dcacheInterface->isBlocked()*/; } + !storeQueue.back().completed && + !isStoreBlocked; } void switchOut(); @@ -243,12 +217,21 @@ class OzoneLWLSQ { bool switchedOut; private: + /** Writes back the instruction, sending it to IEW. */ + void writeback(DynInstPtr &inst, PacketPtr pkt); + + /** Handles completing the send of a store to memory. */ + void storePostSend(Packet *pkt, DynInstPtr &inst); + /** Completes the store at the specified index. */ void completeStore(int store_idx); + /** Handles doing the retry. */ + void recvRetry(); + private: /** Pointer to the CPU. */ - FullCPU *cpu; + OzoneCPU *cpu; /** Pointer to the back-end stage. */ BackEnd *be; @@ -258,11 +241,13 @@ class OzoneLWLSQ { class DcachePort : public Port { protected: - FullCPU *cpu; + OzoneCPU *cpu; + + OzoneLWLSQ *lsq; public: - DcachePort(const std::string &_name, FullCPU *_cpu) - : Port(_name), cpu(_cpu) + DcachePort(OzoneCPU *_cpu, OzoneLWLSQ *_lsq) + : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) { } protected: @@ -282,7 +267,7 @@ class OzoneLWLSQ { }; /** Pointer to the D-cache. */ - DcachePort dcachePort; + DcachePort *dcachePort; /** Pointer to the page table. */ // PageTable *pTable; @@ -319,6 +304,48 @@ class OzoneLWLSQ { typename std::list::iterator lqIt; }; + /** Derived class to hold any sender state the LSQ needs. */ + class LSQSenderState : public Packet::SenderState + { + public: + /** Default constructor. */ + LSQSenderState() + : noWB(false) + { } + + /** Instruction who initiated the access to memory. */ + DynInstPtr inst; + /** Whether or not it is a load. */ + bool isLoad; + /** The LQ/SQ index of the instruction. */ + int idx; + /** Whether or not the instruction will need to writeback. */ + bool noWB; + }; + + /** Writeback event, specifically for when stores forward data to loads. */ + class WritebackEvent : public Event { + public: + /** Constructs a writeback event. */ + WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, OzoneLWLSQ *lsq_ptr); + + /** Processes the writeback event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** Instruction whose results are being written back. */ + DynInstPtr inst; + + /** The packet that would have been sent to memory. */ + PacketPtr pkt; + + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ *lsqPtr; + }; + enum Status { Running, Idle, @@ -395,6 +422,12 @@ class OzoneLWLSQ { /** The index of the above store. */ LQIt stallingLoad; + /** The packet that needs to be retried. */ + PacketPtr retryPkt; + + /** Whehter or not a store is blocked due to the memory system. */ + bool isStoreBlocked; + /** Whether or not a load is blocked due to the memory system. It is * cleared when this value is checked via loadBlocked(). */ @@ -470,7 +503,7 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) // too). // @todo: Fix uncached accesses. if (req->getFlags() & UNCACHEABLE && - (inst != loadQueue.back() || !inst->reachedCommit)) { + (inst != loadQueue.back() || !inst->isAtCommit())) { DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " "commit/LSQ!\n", inst->seqNum); @@ -532,17 +565,19 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " "[sn:%lli] addr %#x, data %#x\n", - (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData)); -/* - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(inst, - be); + (*sq_it).inst->seqNum, inst->seqNum, req->getVaddr(), + *(inst->memData)); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); // We'll say this has a 1 cycle load-store forwarding latency // for now. - // FIXME - Need to make this a parameter. + // @todo: Need to make this a parameter. wb->schedule(curTick); -*/ + // Should keep track of stat for forwarded data return NoFault; } else if ((store_has_lower_limit && lower_load_has_store_part) || @@ -575,7 +610,7 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " "Store [sn:%lli] to load addr %#x\n", - (*sq_it).inst->seqNum, req->vaddr); + (*sq_it).inst->seqNum, req->getVaddr()); return NoFault; } @@ -597,8 +632,14 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(inst->memData); + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = inst; + data_pkt->senderState = state; + // if we have a cache, do cache access too - if (!dcachePort.sendTiming(data_pkt)) { + if (!dcachePort->sendTiming(data_pkt)) { // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index 05db3028ac..effb21728c 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -28,58 +28,105 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/isa_traits.hh" #include "base/str.hh" #include "cpu/ozone/lw_lsq.hh" #include "cpu/checker/cpu.hh" -template -OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, - BackEnd *_be, - Event *wb_event, - OzoneLWLSQ *lsq_ptr) - : Event(&mainEventQueue), - inst(_inst), - be(_be), - wbEvent(wb_event), - miss(false), - lsqPtr(lsq_ptr) +template +OzoneLWLSQ::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, + OzoneLWLSQ *lsq_ptr) + : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) { this->setFlags(Event::AutoDelete); } -template +template void -OzoneLWLSQ::StoreCompletionEvent::process() +OzoneLWLSQ::WritebackEvent::process() { - DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", - inst->seqNum); - - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); - -// lsqPtr->cpu->wakeCPU(); - if (lsqPtr->isSwitchedOut()) { - if (wbEvent) - delete wbEvent; - - return; + if (!lsqPtr->isSwitchedOut()) { + lsqPtr->writeback(inst, pkt); } + delete pkt; +} - if (wbEvent) { - wbEvent->process(); - delete wbEvent; - } - - lsqPtr->completeStore(inst->sqIdx); - if (miss) - be->removeDcacheMiss(inst); +template +const char * +OzoneLWLSQ::WritebackEvent::description() +{ + return "Store writeback event"; } template -const char * -OzoneLWLSQ::StoreCompletionEvent::description() +Tick +OzoneLWLSQ::DcachePort::recvAtomic(PacketPtr pkt) { - return "LSQ store completion event"; + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template +void +OzoneLWLSQ::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template +void +OzoneLWLSQ::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template +bool +OzoneLWLSQ::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->completeDataAccess(pkt); + return true; +} + +template +void +OzoneLWLSQ::DcachePort::recvRetry() +{ + lsq->recvRetry(); +} + +template +void +OzoneLWLSQ::completeDataAccess(PacketPtr pkt) +{ + LSQSenderState *state = dynamic_cast(pkt->senderState); + DynInstPtr inst = state->inst; + DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum); + DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (isSwitchedOut() || inst->isSquashed()) { + delete state; + delete pkt; + return; + } else { + if (!state->noWB) { + writeback(inst, pkt); + } + + if (inst->isStore()) { + completeStore(state->idx); + } + } + + delete state; + delete pkt; } template @@ -109,8 +156,6 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - dcacheInterface = params->dcacheInterface; - loadFaultInst = storeFaultInst = memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -123,6 +168,24 @@ OzoneLWLSQ::name() const return "lsqunit"; } +template +void +OzoneLWLSQ::setCPU(OzoneCPU *cpu_ptr) +{ + cpu = cpu_ptr; + dcachePort = new DcachePort(cpu, this); + + Port *mem_dport = mem->getPort(""); + dcachePort->setPeer(mem_dport); + mem_dport->setPeer(dcachePort); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setDcachePort(dcachePort); + } +#endif +} + template void OzoneLWLSQ::clearLQ() @@ -481,6 +544,12 @@ OzoneLWLSQ::writebackStores() (*sq_it).canWB && usedPorts < cachePorts) { + if (isStoreBlocked) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + DynInstPtr inst = (*sq_it).inst; if ((*sq_it).size == 0 && !(*sq_it).completed) { @@ -495,48 +564,64 @@ OzoneLWLSQ::writebackStores() continue; } - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - ++usedPorts; assert((*sq_it).req); assert(!(*sq_it).committed); + Request *req = (*sq_it).req; (*sq_it).committed = true; - MemReqPtr req = (*sq_it).req; + assert(!inst->memData); + inst->memData = new uint8_t[64]; + memcpy(inst->memData, (uint8_t *)&(*sq_it).data, + req->getSize()); - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; + PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - if (!(req->flags & LOCKED)) { - (*sq_it).inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick((*sq_it).inst); + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = inst->sqIdx; + state->inst = inst; + data_pkt->senderState = state; + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + (*sq_it).inst->readPC(), + req->getPaddr(), *(inst->memData), + inst->seqNum); + + // @todo: Remove this SC hack once the memory system handles it. + if (req->getFlags() & LOCKED) { + if (req->getFlags() & UNCACHEABLE) { + req->setScResult(2); + } else { + if (cpu->lockFlag) { + req->setScResult(1); + } else { + req->setScResult(0); + // Hack: Instantly complete this store. + completeDataAccess(data_pkt); + --sq_it; + continue; + } } + } else { + // Non-store conditionals do not need a writeback. + state->noWB = true; } + if (!dcachePort->sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + isStoreBlocked = true; + assert(retryPkt == NULL); + retryPkt = data_pkt; + } else { + storePostSend(data_pkt, inst); + --sq_it; + } +/* DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " "to Addr:%#x, data:%#x [sn:%lli]\n", inst->sqIdx,inst->readPC(), @@ -606,6 +691,7 @@ OzoneLWLSQ::writebackStores() } else { panic("Must HAVE DCACHE!!!!!\n"); } +*/ } // Not sure this should set it to 0. @@ -685,10 +771,6 @@ OzoneLWLSQ::squash(const InstSeqNum &squashed_num) SQIndices.push((*sq_it).inst->sqIdx); (*sq_it).inst = NULL; (*sq_it).canWB = 0; - - if ((*sq_it).req) { - assert(!(*sq_it).req->completionEvent); - } (*sq_it).req = NULL; --stores; storeQueue.erase(sq_it++); @@ -732,6 +814,72 @@ OzoneLWLSQ::dumpInsts() cprintf("\n"); } +template +void +OzoneLWLSQ::storePostSend(Packet *pkt, DynInstPtr &inst) +{ + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (!inst->isStoreConditional()) { + // The store is basically completed at this time. This + // only works so long as the checker doesn't try to + // verify the value in memory for stores. + inst->setCompleted(); +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->verify(inst); + } +#endif + } + + if (pkt->result != Packet::Success) { + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n"); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + inst->seqNum); + } +} + +template +void +OzoneLWLSQ::writeback(DynInstPtr &inst, PacketPtr pkt) +{ + // Squashed instructions do not need to complete their access. + if (inst->isSquashed()) { + assert(!inst->isStore()); + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); +} + template void OzoneLWLSQ::completeStore(int store_idx) @@ -766,9 +914,18 @@ OzoneLWLSQ::completeStore(int store_idx) --stores; inst->setCompleted(); +#if USE_CHECKER if (cpu->checker) { - cpu->checker->tick(inst); + cpu->checker->verify(inst); } +#endif +} + +template +void +OzoneLWLSQ::recvRetry() +{ + panic("Unimplemented!"); } template @@ -777,68 +934,6 @@ OzoneLWLSQ::switchOut() { assert(storesToWB == 0); switchedOut = true; - SQIt sq_it = --(storeQueue.end()); - while (storesToWB > 0 && - sq_it != storeQueue.end() && - (*sq_it).inst && - (*sq_it).canWB) { - - DynInstPtr inst = (*sq_it).inst; - - if ((*sq_it).size == 0 && !(*sq_it).completed) { - sq_it--; - continue; - } - - // Store conditionals don't complete until *after* they have written - // back. If it's here and not yet sent to memory, then don't bother - // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed) { - sq_it--; - continue; - } else if ((*sq_it).req->flags & LOCKED) { - sq_it--; - assert(!(*sq_it).canWB || - ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); - continue; - } - - assert((*sq_it).req); - assert(!(*sq_it).committed); - - MemReqPtr req = (*sq_it).req; - (*sq_it).committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); - - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - } // Clear the queue to free up resources storeQueue.clear(); diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh index e977d06a9c..5036757385 100644 --- a/src/cpu/ozone/ozone_impl.hh +++ b/src/cpu/ozone/ozone_impl.hh @@ -50,7 +50,7 @@ class OzoneDynInst; struct OzoneImpl { typedef SimpleParams Params; typedef OzoneCPU OzoneCPU; - typedef OzoneCPU FullCPU; + typedef OzoneCPU CPUType; // Would like to put these into their own area. // typedef NullPredictor BranchPred; diff --git a/src/cpu/ozone/rename_table.cc b/src/cpu/ozone/rename_table.cc index b0a36afbeb..a44054b6e8 100644 --- a/src/cpu/ozone/rename_table.cc +++ b/src/cpu/ozone/rename_table.cc @@ -30,7 +30,7 @@ #include "cpu/ozone/rename_table_impl.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class RenameTable; -template class RenameTable; +//template class RenameTable; diff --git a/src/cpu/ozone/simple_cpu_builder.cc b/src/cpu/ozone/simple_cpu_builder.cc new file mode 100644 index 0000000000..baaf7c7084 --- /dev/null +++ b/src/cpu/ozone/simple_cpu_builder.cc @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +template +class OzoneCPU; + +class SimpleOzoneCPU : public OzoneCPU +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param clock; + Param numThreads; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; +//SimObjectParam page_table; +#endif // FULL_SYSTEM + +SimObjectParam mem; + +SimObjectParam checker; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param cachePorts; +Param width; +Param frontEndWidth; +Param backEndWidth; +Param backEndSquashLatency; +Param backEndLatency; +Param maxInstBufferSize; +Param numPhysicalRegs; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param predType; +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param decoupledFrontEnd; +Param dispatchWidth; +Param wbWidth; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + params->predType = predType; + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh index 13eb05e775..11cee716f1 100644 --- a/src/cpu/ozone/simple_params.hh +++ b/src/cpu/ozone/simple_params.hh @@ -37,8 +37,7 @@ class AlphaDTB; class AlphaITB; class FUPool; -class FunctionalMemory; -class MemInterface; +class MemObject; class PageTable; class Process; class System; @@ -62,13 +61,13 @@ class SimpleParams : public BaseCPU::Params //Page Table PageTable *pTable; - FunctionalMemory *mem; + MemObject *mem; // // Caches // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; +// MemInterface *icacheInterface; +// MemInterface *dcacheInterface; unsigned cachePorts; unsigned width; diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh index 299878c292..ef4b1429d2 100644 --- a/src/cpu/ozone/thread_state.hh +++ b/src/cpu/ozone/thread_state.hh @@ -58,30 +58,23 @@ class FunctionalMemory; template struct OzoneThreadState : public ThreadState { typedef typename ThreadContext::Status Status; - typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::CPUType CPUType; typedef TheISA::MiscReg MiscReg; #if FULL_SYSTEM - OzoneThreadState(FullCPU *_cpu, int _thread_num) + OzoneThreadState(CPUType *_cpu, int _thread_num) : ThreadState(-1, _thread_num), - inSyscall(0), trapPending(0) + intrflag(0), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #else - OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, NULL, _process, _asid), + OzoneThreadState(CPUType *_cpu, int _thread_num, Process *_process, + int _asid, MemObject *mem) + : ThreadState(-1, _thread_num, _process, _asid, mem), cpu(_cpu), inSyscall(0), trapPending(0) { - memset(®s, 0, sizeof(TheISA::RegFile)); - } - - OzoneThreadState(FullCPU *_cpu, int _thread_num, - int _asid) - : ThreadState(-1, _thread_num, NULL, NULL, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); + miscRegFile.clear(); } #endif @@ -91,9 +84,11 @@ struct OzoneThreadState : public ThreadState { Addr nextPC; - TheISA::RegFile regs; + TheISA::MiscRegFile miscRegFile; - typename Impl::FullCPU *cpu; + int intrflag; + + typename Impl::CPUType *cpu; bool inSyscall; @@ -103,54 +98,24 @@ struct OzoneThreadState : public ThreadState { ThreadContext *getTC() { return tc; } -#if !FULL_SYSTEM - Fault translateInstReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataReadReq(Request *req) - { - return process->pTable->translate(req); - } - Fault translateDataWriteReq(Request *req) - { - return process->pTable->translate(req); - } -#else - Fault translateInstReq(Request *req) - { - return cpu->itb->translate(req); - } - - Fault translateDataReadReq(Request *req) - { - return cpu->dtb->translate(req, false); - } - - Fault translateDataWriteReq(Request *req) - { - return cpu->dtb->translate(req, true); - } -#endif - MiscReg readMiscReg(int misc_reg) { - return regs.readMiscReg(misc_reg); + return miscRegFile.readReg(misc_reg); } MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) { - return regs.readMiscRegWithEffect(misc_reg, fault, tc); + return miscRegFile.readRegWithEffect(misc_reg, fault, tc); } Fault setMiscReg(int misc_reg, const MiscReg &val) { - return regs.setMiscReg(misc_reg, val); + return miscRegFile.setReg(misc_reg, val); } Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) { - return regs.setMiscRegWithEffect(misc_reg, val, tc); + return miscRegFile.setRegWithEffect(misc_reg, val, tc); } uint64_t readPC() From fa9c774421952f5f4b6aee240da693e5f94ba1ac Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 23:34:37 -0400 Subject: [PATCH 061/152] Delete old unused files in the Ozone directory. --HG-- extra : convert_revision : 8f417b566e772d7a26d91fb66ff3d4484bd35c42 From 63bdaeedfae71aa9eab4716a884fad9d7c4ece54 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 22 Jun 2006 23:43:45 -0400 Subject: [PATCH 062/152] Checker related updates. src/cpu/o3/cpu.cc: Updates to make sure the checker is compiled in if enabled and also to include it only when it's used. --HG-- extra : convert_revision : c48ead5b2665dc858acd87c2ee99d39d80594a69 --- src/cpu/o3/cpu.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index a411fe42e1..5533990481 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -39,7 +39,6 @@ #endif #include "cpu/activity.hh" -#include "cpu/checker/cpu.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" #include "cpu/o3/alpha_dyn_inst.hh" @@ -49,6 +48,10 @@ #include "sim/root.hh" #include "sim/stat_control.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace std; using namespace TheISA; @@ -135,16 +138,18 @@ FullO3CPU::FullO3CPU(Params *params) checker = NULL; -#if USE_CHECKER if (params->checker) { +#if USE_CHECKER BaseCPU *temp_checker = params->checker; checker = dynamic_cast *>(temp_checker); checker->setMemory(mem); #if FULL_SYSTEM checker->setSystem(params->system); #endif +#else + panic("Checker enabled but not compiled in!"); +#endif // USE_CHECKER } -#endif #if !FULL_SYSTEM thread.resize(number_of_threads); @@ -688,8 +693,10 @@ FullO3CPU::signalSwitched() removeList.pop(); } +#if USE_CHECKER if (checker) checker->switchOut(sampler); +#endif if (tickEvent.scheduled()) tickEvent.squash(); From 4787d357d51811bf5f4c73583e038de3f60e6a72 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sun, 25 Jun 2006 00:22:41 -0400 Subject: [PATCH 063/152] Make OzoneCPU work again in SE/FS. src/cpu/ozone/cpu.hh: Fixes to get OzoneCPU working in SE/FS again. src/cpu/ozone/cpu_impl.hh: Be sure to set up ports properly. src/cpu/ozone/front_end.hh: Allow port to be created without specifying its name at the beginning. src/cpu/ozone/front_end_impl.hh: Setup port properly, also only use checker if it's enabled. src/cpu/ozone/lw_back_end_impl.hh: Be sure to initialize variables. src/cpu/ozone/lw_lsq.hh: Handle locked flag for UP systems. src/cpu/ozone/lw_lsq_impl.hh: Initialize all variables. src/python/m5/objects/OzoneCPU.py: Fix up config. --HG-- extra : convert_revision : c99e7bf82fc0dd1099c7a82eaebd58ab6017764d --- src/cpu/ozone/cpu.hh | 3 +-- src/cpu/ozone/cpu_impl.hh | 30 +++++++++++++++++++++++++++++- src/cpu/ozone/front_end.hh | 9 +++++---- src/cpu/ozone/front_end_impl.hh | 26 ++++++++++++++++++++++++++ src/cpu/ozone/lw_back_end_impl.hh | 3 ++- src/cpu/ozone/lw_lsq.hh | 4 ++++ src/cpu/ozone/lw_lsq_impl.hh | 6 ++++-- src/python/m5/objects/OzoneCPU.py | 3 --- 8 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index cacc84786a..f726ac99b4 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -214,12 +214,11 @@ class OzoneCPU : public BaseCPU uint64_t readNextNPC() { - panic("Alpha has no NextNPC!"); return 0; } void setNextNPC(uint64_t val) - { panic("Alpha has no NextNPC!"); } + { } public: // ISA stuff: diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 2b25ad124d..2cdc8a3da9 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -201,7 +201,35 @@ OzoneCPU::OzoneCPU(Params *p) backEnd->renameTable.copyFrom(thread.renameTable); #if !FULL_SYSTEM -// pTable = p->pTable; + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + TranslatingPort *trans_port; + trans_port = new TranslatingPort(csprintf("%s-%d-funcport", + name(), 0), + p->workload[0]->pTable, + false); + mem_port = p->mem->getPort("functional"); + mem_port->setPeer(trans_port); + trans_port->setPeer(mem_port); + thread.setMemPort(trans_port); +#else + Port *mem_port; + FunctionalPort *phys_port; + VirtualPort *virt_port; + phys_port = new FunctionalPort(csprintf("%s-%d-funcport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(phys_port); + phys_port->setPeer(mem_port); + + virt_port = new VirtualPort(csprintf("%s-%d-vport", + name(), 0)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virt_port); + virt_port->setPeer(mem_port); + + thread.setPhysPort(phys_port); + thread.setVirtPort(virt_port); #endif lockFlag = 0; diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index af310efc32..181609098c 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -43,7 +43,7 @@ #include "sim/stats.hh" class ThreadContext; -class MemInterface; +class MemObject; template class OzoneThreadState; class PageTable; @@ -75,7 +75,7 @@ class FrontEnd public: /** Default constructor. */ IcachePort(FrontEnd *_fe) - : Port(_fe->name() + "-iport"), fe(_fe) + : fe(_fe) { } protected: @@ -105,8 +105,7 @@ class FrontEnd std::string name() const; - void setCPU(CPUType *cpu_ptr) - { cpu = cpu_ptr; } + void setCPU(CPUType *cpu_ptr); void setBackEnd(BackEnd *back_end_ptr) { backEnd = back_end_ptr; } @@ -206,6 +205,8 @@ class FrontEnd IcachePort icachePort; + MemObject *mem; + RequestPtr memReq; /** Mask to get a cache block's address. */ diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index b1bc325c73..40042489da 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -28,6 +28,8 @@ * Authors: Kevin Lim */ +#include "config/use_checker.hh" + #include "arch/faults.hh" #include "arch/isa_traits.hh" #include "base/statistics.hh" @@ -37,6 +39,10 @@ #include "mem/packet.hh" #include "mem/request.hh" +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + using namespace TheISA; template @@ -83,6 +89,7 @@ template FrontEnd::FrontEnd(Params *params) : branchPred(params), icachePort(this), + mem(params->mem), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), width(params->frontEndWidth), @@ -123,6 +130,25 @@ FrontEnd::name() const return cpu->name() + ".frontend"; } +template +void +FrontEnd::setCPU(CPUType *cpu_ptr) +{ + cpu = cpu_ptr; + + icachePort.setName(this->name() + "-iport"); + + Port *mem_dport = mem->getPort(""); + icachePort.setPeer(mem_dport); + mem_dport->setPeer(&icachePort); + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setIcachePort(&icachePort); + } +#endif +} + template void FrontEnd::setCommBuffer(TimeBuffer *_comm) diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index dcd7a0d7ed..a73d3ee6ee 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -142,7 +142,7 @@ LWBackEnd::replayMemInst(DynInstPtr &inst) template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), + trapSquash(false), tcSquash(false), LSQ(params), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; @@ -169,6 +169,7 @@ LWBackEnd::LWBackEnd(Params *params) LSQ.init(params, params->LQEntries, params->SQEntries, 0); dispatchStatus = Running; + commitStatus = Running; } template diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index e0c1901345..c749e3aeea 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -654,6 +654,10 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) return NoFault; } + if (req->getFlags() & LOCKED) { + cpu->lockFlag = true; + } + if (data_pkt->result != Packet::Success) { DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index effb21728c..a65a2a4d37 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -131,8 +131,8 @@ OzoneLWLSQ::completeDataAccess(PacketPtr pkt) template OzoneLWLSQ::OzoneLWLSQ() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), - loadBlockedHandled(false) + : switchedOut(false), loads(0), stores(0), storesToWB(0), stalled(false), + isStoreBlocked(false), isLoadBlocked(false), loadBlockedHandled(false) { } @@ -153,6 +153,8 @@ OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, SQIndices.push(i); } + mem = params->mem; + usedPorts = 0; cachePorts = params->cachePorts; diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py index f2d9aea849..8aff89203f 100644 --- a/src/python/m5/objects/OzoneCPU.py +++ b/src/python/m5/objects/OzoneCPU.py @@ -7,9 +7,6 @@ class DerivOzoneCPU(BaseCPU): numThreads = Param.Unsigned("number of HW thread contexts") - if not build_env['FULL_SYSTEM']: - mem = Param.FunctionalMemory(NULL, "memory") - checker = Param.BaseCPU("Checker CPU") width = Param.Unsigned("Width") From f1a24337a82b04b4bb58499a46dc80f00bdc47d7 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Sun, 25 Jun 2006 00:24:50 -0400 Subject: [PATCH 064/152] Allow ports to be created without a name. --HG-- extra : convert_revision : 26dad6853feaf4f68907aab902c54259281cac1c --- src/mem/port.hh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mem/port.hh b/src/mem/port.hh index 2edad095e4..17b1f4a00d 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -74,7 +74,7 @@ class Port private: /** Descriptive name (for DPRINTF output) */ - const std::string portName; + mutable std::string portName; /** A pointer to the peer port. Ports always come in pairs, that way they can use a standardized interface to communicate between different @@ -83,6 +83,10 @@ class Port public: + Port() + : peer(NULL) + { } + /** * Constructor. * @@ -105,6 +109,9 @@ class Port RangeChange }; + void setName(const std::string &name) + { portName = name; } + /** Function to set the pointer for the peer port. @todo should be called by the configuration stuff (python). */ From c4be6f1e64a7bdd0c5057e2ab3f6a117444bc8ea Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Mon, 26 Jun 2006 16:49:05 -0400 Subject: [PATCH 065/152] add syscall emulation page table fault so we can allocate more stack pages src/cpu/simple/base.cc: add syscall emulation page table fault so we can allocate more stack pages FaultBase::invoke will do this, we don't need to do it here src/sim/faults.hh: I have no idea why this #if was there... gone src/sim/process.cc: make stack_min actually be the current minimum --HG-- extra : convert_revision : 9786b39f2747b94654a5d77c74243cd20503add4 --- src/arch/alpha/faults.cc | 31 +++++++++++++++++++++++++++++++ src/arch/alpha/faults.hh | 23 +++++++++++++++++++++++ src/arch/mips/faults.cc | 33 ++++++++++++++++++++++++++++++++- src/arch/mips/faults.hh | 24 ++++++++++++++++++++++++ src/arch/sparc/faults.cc | 30 ++++++++++++++++++++++++++++++ src/arch/sparc/faults.hh | 26 ++++++++++++++++++++++++++ src/cpu/simple/base.cc | 4 ---- src/mem/page_table.cc | 2 +- src/sim/faults.hh | 4 ---- src/sim/process.cc | 5 ++--- 10 files changed, 169 insertions(+), 13 deletions(-) diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index 8493223ffb..06765768a1 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -35,6 +35,9 @@ #include "base/trace.hh" #if FULL_SYSTEM #include "arch/alpha/ev5.hh" +#else +#include "sim/process.hh" +#include "mem/page_table.hh" #endif namespace AlphaISA @@ -56,6 +59,12 @@ FaultName ArithmeticFault::_name = "arith"; FaultVect ArithmeticFault::_vect = 0x0501; FaultStat ArithmeticFault::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +FaultVect PageTableFault::_vect = 0x0000; +FaultStat PageTableFault::_count; +#endif + FaultName InterruptFault::_name = "interrupt"; FaultVect InterruptFault::_vect = 0x0101; FaultStat InterruptFault::_count; @@ -173,6 +182,28 @@ void ItbFault::invoke(ThreadContext * tc) AlphaFault::invoke(tc); } +#else //!FULL_SYSTEM + +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + warn("Increasing stack size by one page."); + } else { + FaultBase::invoke(tc); + } +} + #endif } // namespace AlphaISA diff --git a/src/arch/alpha/faults.hh b/src/arch/alpha/faults.hh index f952cf9d69..11a5681742 100644 --- a/src/arch/alpha/faults.hh +++ b/src/arch/alpha/faults.hh @@ -81,6 +81,29 @@ class AlignmentFault : public AlphaFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public AlphaFault +{ + private: + Addr vaddr; + static FaultName _name; + static FaultVect _vect; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + FaultVect vect() {return _vect;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + static inline Fault genMachineCheckFault() { return new MachineCheckFault; diff --git a/src/arch/mips/faults.cc b/src/arch/mips/faults.cc index 810c3fed4c..cfeb045ebb 100644 --- a/src/arch/mips/faults.cc +++ b/src/arch/mips/faults.cc @@ -32,6 +32,10 @@ #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" +#if !FULL_SYSTEM +#include "sim/process.hh" +#include "mem/page_table.hh" +#endif namespace MipsISA { @@ -52,6 +56,12 @@ FaultName ArithmeticFault::_name = "arith"; FaultVect ArithmeticFault::_vect = 0x0501; FaultStat ArithmeticFault::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +FaultVect PageTableFault::_vect = 0x0000; +FaultStat PageTableFault::_count; +#endif + FaultName InterruptFault::_name = "interrupt"; FaultVect InterruptFault::_vect = 0x0101; FaultStat InterruptFault::_count; @@ -127,7 +137,28 @@ void ArithmeticFault::invoke(ThreadContext * tc) panic("Arithmetic traps are unimplemented!"); } -#endif +#else //!FULL_SYSTEM +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + warn("Increasing stack size by one page."); + } else { + FaultBase::invoke(tc); + } +} + +#endif } // namespace MipsISA diff --git a/src/arch/mips/faults.hh b/src/arch/mips/faults.hh index d8bf59cc1f..95c61cfbcb 100644 --- a/src/arch/mips/faults.hh +++ b/src/arch/mips/faults.hh @@ -79,6 +79,30 @@ class AlignmentFault : public MipsFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public MipsFault +{ + private: + Addr vaddr; + static FaultName _name; + static FaultVect _vect; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + FaultVect vect() {return _vect;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + + static inline Fault genMachineCheckFault() { return new MachineCheckFault; diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc index 2af242bd8a..7b7765935a 100644 --- a/src/arch/sparc/faults.cc +++ b/src/arch/sparc/faults.cc @@ -33,6 +33,10 @@ #include "cpu/thread_context.hh" #include "cpu/base.hh" #include "base/trace.hh" +#if !FULL_SYSTEM +#include "sim/process.hh" +#include "mem/page_table.hh" +#endif namespace SparcISA { @@ -218,6 +222,13 @@ TrapType TrapInstruction::_baseTrapType = 0x100; FaultPriority TrapInstruction::_priority = 16; FaultStat TrapInstruction::_count; +#if !FULL_SYSTEM +FaultName PageTableFault::_name = "page_table_fault"; +TrapType PageTableFault::_trapType = 0x0000; +FaultPriority PageTableFault::_priority = 0; +FaultStat PageTableFault::_count; +#endif + #if FULL_SYSTEM void SparcFault::invoke(ThreadContext * tc) @@ -252,6 +263,25 @@ void TrapInstruction::invoke(ThreadContext * tc) // Should be handled in ISA. } +void PageTableFault::invoke(ThreadContext *tc) +{ + Process *p = tc->getProcessPtr(); + + // address is higher than the stack region or in the current stack region + if (vaddr > p->stack_base || vaddr > p->stack_min) + FaultBase::invoke(tc); + + // We've accessed the next page + if (vaddr > p->stack_min - PageBytes) { + p->stack_min -= PageBytes; + if (p->stack_base - p->stack_min > 8*1024*1024) + fatal("Over max stack size for one thread\n"); + p->pTable->allocate(p->stack_min, PageBytes); + warn("Increasing stack size by one page."); + } else { + FaultBase::invoke(tc); + } +} #endif } // namespace SparcISA diff --git a/src/arch/sparc/faults.hh b/src/arch/sparc/faults.hh index 9f595a28b7..b279f4911b 100644 --- a/src/arch/sparc/faults.hh +++ b/src/arch/sparc/faults.hh @@ -83,6 +83,31 @@ class MemAddressNotAligned : public SparcFault bool isAlignmentFault() {return true;} }; +#if !FULL_SYSTEM +class PageTableFault : public SparcFault +{ + private: + Addr vaddr; + static FaultName _name; + static TrapType _trapType; + static FaultPriority _priority; + static FaultStat _count; + public: + PageTableFault(Addr va) + : vaddr(va) {} + FaultName name() {return _name;} + TrapType trapType() {return _trapType;} + FaultPriority priority() {return _priority;} + FaultStat & countStat() {return _count;} + void invoke(ThreadContext * tc); +}; + +static inline Fault genPageTableFault(Addr va) +{ + return new PageTableFault(va); +} +#endif + static inline Fault genMachineCheckFault() { return new InternalProcessorError; @@ -589,6 +614,7 @@ class TrapInstruction : public EnumeratedFault #endif }; + } // SparcISA namespace #endif // __FAULTS_HH__ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index b4258fce68..db5dd2acf5 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -446,11 +446,7 @@ void BaseSimpleCPU::advancePC(Fault fault) { if (fault != NoFault) { -#if FULL_SYSTEM fault->invoke(tc); -#else // !FULL_SYSTEM - fatal("fault (%s) detected @ PC %08p", fault->name(), thread->readPC()); -#endif // FULL_SYSTEM } else { // go to the next instruction diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index b5cecc7da9..7daf319002 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -130,7 +130,7 @@ PageTable::translate(RequestPtr &req) assert(pageAlign(req->getVaddr() + req->getSize() - 1) == pageAlign(req->getVaddr())); if (!translate(req->getVaddr(), paddr)) { - return genMachineCheckFault(); + return genPageTableFault(req->getVaddr()); } req->setPaddr(paddr); return page_check(req->getPaddr(), req->getSize()); diff --git a/src/sim/faults.hh b/src/sim/faults.hh index 23385c649a..00264d8fc6 100644 --- a/src/sim/faults.hh +++ b/src/sim/faults.hh @@ -54,11 +54,7 @@ class FaultBase : public RefCounted { public: virtual FaultName name() = 0; -#if FULL_SYSTEM virtual void invoke(ThreadContext * tc); -#else - virtual void invoke(ThreadContext * tc); -#endif // template // bool isA() {return dynamic_cast(this);} virtual bool isMachineCheckFault() {return false;} diff --git a/src/sim/process.cc b/src/sim/process.cc index 5080c3ac1a..9cdc5b9f5d 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -326,11 +326,10 @@ LiveProcess::argsInit(int intSize, int pageSize) // set bottom of stack stack_min = stack_base - space_needed; // align it - stack_min &= ~(intSize-1); + stack_min = roundDown(stack_min, pageSize); stack_size = stack_base - stack_min; // map memory - pTable->allocate(roundDown(stack_min, pageSize), - roundUp(stack_size, pageSize)); + pTable->allocate(stack_min, roundUp(stack_size, pageSize)); // map out initial stack contents Addr argv_array_base = stack_min + intSize; // room for argc From d80acd37bdbadb95f7f116e130f98c528ba93abe Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Mon, 26 Jun 2006 16:50:19 -0400 Subject: [PATCH 066/152] add python options for input file and command line options for live process --HG-- extra : convert_revision : 3db1e6d29846812378aa5174179a0686f0141580 --- configs/test/test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configs/test/test.py b/configs/test/test.py index a570c1a086..738219d82e 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -12,6 +12,8 @@ from FullO3Config import * parser = optparse.OptionParser(option_list=m5.standardOptions) parser.add_option("-c", "--cmd", default="hello") +parser.add_option("-o", "--options", default="") +parser.add_option("-i", "--input", default="") parser.add_option("-t", "--timing", action="store_true") parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-m", "--maxtick", type="int") @@ -28,7 +30,9 @@ this_dir = os.path.dirname(__file__) process = LiveProcess() process.executable = os.path.join(this_dir, options.cmd) -process.cmd = options.cmd +process.cmd = options.cmd + " " + options.options +if options.input != "": + process.input = options.input magicbus = Bus() mem = PhysicalMemory() From b1cfe01b5730af7d9f215466ebe108fc96dc0a27 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Mon, 26 Jun 2006 17:49:49 -0400 Subject: [PATCH 067/152] remove extern "C" from the functions we all from gdb. This isn't requried and trips up GDB sometimes when i thinks the extern name should be mangled, but it isn't --HG-- extra : convert_revision : 62e2a1989e8fd3d73958d3a3e2d00e378488e642 --- src/base/fast_alloc.cc | 2 -- src/base/trace.cc | 5 ----- src/sim/debug.cc | 4 ++-- src/sim/sim_object.cc | 1 - src/sim/stat_control.cc | 3 +-- src/sim/system.cc | 1 - 6 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/base/fast_alloc.cc b/src/base/fast_alloc.cc index 455fb8ed7d..610dff66c0 100644 --- a/src/base/fast_alloc.cc +++ b/src/base/fast_alloc.cc @@ -180,13 +180,11 @@ FastAlloc::dump_oldest(int n) // C interfaces to FastAlloc::dump_summary() and FastAlloc::dump_oldest(). // gdb seems to have trouble with calling C++ functions directly. // -extern "C" void fast_alloc_summary() { FastAlloc::dump_summary(); } -extern "C" void fast_alloc_oldest(int n) { FastAlloc::dump_oldest(n); diff --git a/src/base/trace.cc b/src/base/trace.cc index 50426b9926..9fa615f4d5 100644 --- a/src/base/trace.cc +++ b/src/base/trace.cc @@ -247,7 +247,6 @@ DebugOut() // // Dump trace buffer to specified file (cout if NULL) // -extern "C" void dumpTrace(const char *filename) { @@ -269,7 +268,6 @@ dumpTrace(const char *filename) // same facility as the "trace to file" feature, and will print error // messages rather than clobbering an existing ostream pointer. // -extern "C" void echoTrace(bool on) { @@ -289,7 +287,6 @@ echoTrace(bool on) } } -extern "C" void printTraceFlags() { @@ -338,14 +335,12 @@ tweakTraceFlag(const char *string, bool value) cprintf("could not find flag %s\n", string); } -extern "C" void setTraceFlag(const char *string) { tweakTraceFlag(string, true); } -extern "C" void clearTraceFlag(const char *string) { diff --git a/src/sim/debug.cc b/src/sim/debug.cc index b82219f7d7..be9566836f 100644 --- a/src/sim/debug.cc +++ b/src/sim/debug.cc @@ -127,12 +127,12 @@ DebugContext::checkParams() // handy function to schedule DebugBreakEvent on main event queue // (callable from debugger) // -extern "C" void sched_break_cycle(Tick when) +void sched_break_cycle(Tick when) { new DebugBreakEvent(&mainEventQueue, when); } -extern "C" void eventq_dump() +void eventq_dump() { mainEventQueue.dump(); } diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 97e6de439a..a35c7a88d8 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -236,7 +236,6 @@ SimObject::debugObjectBreak(const string &objs) } } -extern "C" void debugObjectBreak(const char *objs) { diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc index f7fc03d740..041830ab7e 100644 --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -221,8 +221,7 @@ SetupEvent(int flags, Tick when, Tick repeat) /* namespace Stats */ } -extern "C" void -debugDumpStats() +void debugDumpStats() { Stats::DumpNow(); } diff --git a/src/sim/system.cc b/src/sim/system.cc index 91bba85fe7..89e7b8542b 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -243,7 +243,6 @@ System::printSystems() } } -extern "C" void printSystems() { From 60454042aaf1c5b3380536c4a1d2255d8f648d7d Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Mon, 26 Jun 2006 17:50:48 -0400 Subject: [PATCH 068/152] don't depend on the memory system to return the atomic cpu a multiple of cpu cycles. --HG-- extra : convert_revision : e5eb36f14c8394381a0269fefd34a178833c8346 --- src/cpu/simple/atomic.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 071193f02e..ce28ba9c8d 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -410,15 +410,14 @@ AtomicSimpleCPU::tick() postExecute(); if (simulate_stalls) { - // This calculation assumes that the icache and dcache - // access latencies are always a multiple of the CPU's - // cycle time. If not, the next tick event may get - // scheduled at a non-integer multiple of the CPU - // cycle time. Tick icache_stall = icache_latency - cycles(1); Tick dcache_stall = dcache_access ? dcache_latency - cycles(1) : 0; - latency += icache_stall + dcache_stall; + Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); + if (cycles(stall_cycles) < (icache_stall + dcache_stall)) + latency += cycles(stall_cycles+1); + else + latency += cycles(stall_cycles); } } From 833d80db2d3119d85038b77360f8e25222b1e3f2 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Tue, 27 Jun 2006 13:57:44 -0400 Subject: [PATCH 069/152] Fix import command. --HG-- extra : convert_revision : 8a87b23dba77b7661583029920b8fc5ea89fe8f6 From 88c9b17cb927e5b789d883023db9516f878f526a Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 27 Jun 2006 14:58:46 -0400 Subject: [PATCH 070/152] Add help strings for options --HG-- extra : convert_revision : ebbafaf00c56a4d2ee65eea08a12d276f279135d --- configs/test/test.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/configs/test/test.py b/configs/test/test.py index 2047d65af8..3095cd1d12 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -11,12 +11,19 @@ from FullO3Config import * # parse command-line arguments parser = optparse.OptionParser(option_list=m5.standardOptions) -parser.add_option("-c", "--cmd", default="hello") -parser.add_option("-o", "--options", default="") -parser.add_option("-i", "--input", default="") -parser.add_option("-t", "--timing", action="store_true") -parser.add_option("-d", "--detailed", action="store_true") -parser.add_option("-m", "--maxtick", type="int") +parser.add_option("-c", "--cmd", default="hello", + help="The binary to run in syscall emulation mode.") +parser.add_option("-o", "--options", default="", + help="The options to pass to the binary, use \" \" around the entire\ + string.") +parser.add_option("-i", "--input", default="", + help="A file of input to give to the binary.") +parser.add_option("-t", "--timing", action="store_true", + help="Use simple timing CPU.") +parser.add_option("-d", "--detailed", action="store_true", + help="Use detailed CPU.") +parser.add_option("-m", "--maxtick", type="int", + help="Set the maximum number of ticks to run for") (options, args) = parser.parse_args() m5.setStandardOptions(options) @@ -37,6 +44,10 @@ if options.input != "": magicbus = Bus() mem = PhysicalMemory() +if options.timing and options.detailed: + print "Error: you may only specify one cpu model"; + sys.exit(1) + if options.timing: cpu = TimingSimpleCPU() elif options.detailed: From 07cd37c48b018679553e6b12a5591c5759f433d6 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 27 Jun 2006 14:59:38 -0400 Subject: [PATCH 071/152] Make full CPU handle SE faults --HG-- extra : convert_revision : e336623ac3329ec0ee2430548c6a9650e2a69d6a --- src/arch/alpha/faults.cc | 4 +++- src/cpu/o3/alpha_cpu.hh | 4 ++-- src/cpu/o3/alpha_cpu_impl.hh | 16 ++++++++-------- src/cpu/o3/commit_impl.hh | 5 ----- src/cpu/o3/fetch.hh | 2 +- src/cpu/o3/lsq_unit.hh | 2 +- 6 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index 06765768a1..eef4361fda 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -194,11 +194,13 @@ void PageTableFault::invoke(ThreadContext *tc) // We've accessed the next page if (vaddr > p->stack_min - PageBytes) { + warn("Increasing stack %#x:%#x to %#x:%#x because of access to %#x", + p->stack_min, p->stack_base, p->stack_min - PageBytes, + p->stack_base, vaddr); p->stack_min -= PageBytes; if (p->stack_base - p->stack_min > 8*1024*1024) fatal("Over max stack size for one thread\n"); p->pTable->allocate(p->stack_min, PageBytes); - warn("Increasing stack size by one page."); } else { FaultBase::invoke(tc); } diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index 55b9751420..d7f3d5801f 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -384,8 +384,6 @@ class AlphaO3CPU : public FullO3CPU bool inPalMode(uint64_t PC) { return AlphaISA::PcPAL(PC); } - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); bool simPalCheck(int palFunc, unsigned tid); /** Processes any interrupts. */ @@ -395,6 +393,8 @@ class AlphaO3CPU : public FullO3CPU void halt() { panic("Halt not implemented!\n"); } #endif + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); #if !FULL_SYSTEM /** Executes a syscall. diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index 532611fb6a..eca6fbbcbf 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -753,14 +753,6 @@ AlphaO3CPU::simPalCheck(int palFunc, unsigned tid) return true; } -template -void -AlphaO3CPU::trap(Fault fault, unsigned tid) -{ - // Pass the thread's TC into the invoke method. - fault->invoke(this->threadContexts[tid]); -} - template void AlphaO3CPU::processInterrupts() @@ -823,6 +815,14 @@ AlphaO3CPU::processInterrupts() #endif // FULL_SYSTEM +template +void +AlphaO3CPU::trap(Fault fault, unsigned tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + #if !FULL_SYSTEM template diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 176f832468..cd7dd47d48 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -991,7 +991,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) if (inst_fault != NoFault) { head_inst->setCompleted(); -#if FULL_SYSTEM DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", head_inst->seqNum, head_inst->readPC()); @@ -1035,10 +1034,6 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) generateTrapEvent(tid); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); -#endif // FULL_SYSTEM } updateComInstStats(head_inst); diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 790c28f09a..7fcd21b7d4 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -36,7 +36,7 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" #include "sim/eventq.hh" diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 9b67e61f2f..74b8fe5bbb 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -40,7 +40,7 @@ #include "config/full_system.hh" #include "base/hashmap.hh" #include "cpu/inst_seq.hh" -#include "mem/packet.hh" +#include "mem/packet_impl.hh" #include "mem/port.hh" /** From ecab4b426c949dad797df0bde1c0c120b4b5fb00 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 27 Jun 2006 15:04:11 -0400 Subject: [PATCH 072/152] change the page table from map to hash_map and create small cache to to speed up lookups --HG-- extra : convert_revision : 4c73ed33c2a22ae3254b459b0fd189e6ac9d438e --- src/mem/page_table.cc | 28 ++++++++++++++++++++++++++-- src/mem/page_table.hh | 11 +++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc index 7daf319002..a34a0393ac 100644 --- a/src/mem/page_table.cc +++ b/src/mem/page_table.cc @@ -54,6 +54,9 @@ PageTable::PageTable(System *_system, Addr _pageSize) system(_system) { assert(isPowerOf2(pageSize)); + pTableCache[0].vaddr = 0; + pTableCache[1].vaddr = 0; + pTableCache[2].vaddr = 0; } PageTable::~PageTable() @@ -95,7 +98,7 @@ PageTable::allocate(Addr vaddr, int size) assert(pageOffset(vaddr) == 0); for (; size > 0; size -= pageSize, vaddr += pageSize) { - std::map::iterator iter = pTable.find(vaddr); + m5::hash_map::iterator iter = pTable.find(vaddr); if (iter != pTable.end()) { // already mapped @@ -103,6 +106,12 @@ PageTable::allocate(Addr vaddr, int size) } pTable[vaddr] = system->new_page(); + pTableCache[2].paddr = pTableCache[1].paddr; + pTableCache[2].vaddr = pTableCache[1].vaddr; + pTableCache[1].paddr = pTableCache[0].paddr; + pTableCache[1].vaddr = pTableCache[0].vaddr; + pTableCache[0].paddr = pTable[vaddr]; + pTableCache[0].vaddr = vaddr; } } @@ -112,7 +121,22 @@ bool PageTable::translate(Addr vaddr, Addr &paddr) { Addr page_addr = pageAlign(vaddr); - std::map::iterator iter = pTable.find(page_addr); + paddr = 0; + + if (pTableCache[0].vaddr == vaddr) { + paddr = pTableCache[0].paddr; + return true; + } + if (pTableCache[1].vaddr == vaddr) { + paddr = pTableCache[1].paddr; + return true; + } + if (pTableCache[2].vaddr == vaddr) { + paddr = pTableCache[2].paddr; + return true; + } + + m5::hash_map::iterator iter = pTable.find(page_addr); if (iter == pTable.end()) { return false; diff --git a/src/mem/page_table.hh b/src/mem/page_table.hh index f7212d4231..494c0ce9a8 100644 --- a/src/mem/page_table.hh +++ b/src/mem/page_table.hh @@ -37,9 +37,9 @@ #define __PAGE_TABLE__ #include -#include #include "arch/isa_traits.hh" +#include "base/hashmap.hh" #include "base/trace.hh" #include "mem/request.hh" #include "mem/packet.hh" @@ -53,7 +53,14 @@ class System; class PageTable { protected: - std::map pTable; + m5::hash_map pTable; + + struct cacheElement { + Addr paddr; + Addr vaddr; + } ; + + struct cacheElement pTableCache[3]; const Addr pageSize; const Addr offsetMask; From ed8564a6b9f0702a40995d95cc4da54de3d35462 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 11:02:14 -0400 Subject: [PATCH 073/152] Was having difficulty with merging the cache, reverted to an early version and will add back in the patches to make it work soon. src/mem/cache/prefetch/tagged_prefetcher_impl.hh: Trying to merge src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: src/mem/cache/cache.cc: src/mem/cache/cache.hh: src/mem/cache/cache_blk.hh: src/mem/cache/cache_builder.cc: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/coherence/coherence_protocol.hh: src/mem/cache/coherence/simple_coherence.hh: src/mem/cache/coherence/uni_coherence.cc: src/mem/cache/coherence/uni_coherence.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/blocking_buffer.hh: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/miss_queue.hh: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: src/mem/cache/miss/mshr_queue.hh: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/prefetch/base_prefetcher.hh: src/mem/cache/prefetch/ghb_prefetcher.cc: src/mem/cache/prefetch/ghb_prefetcher.hh: src/mem/cache/prefetch/stride_prefetcher.cc: src/mem/cache/prefetch/stride_prefetcher.hh: src/mem/cache/prefetch/tagged_prefetcher.hh: src/mem/cache/tags/base_tags.cc: src/mem/cache/tags/base_tags.hh: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/fa_lru.hh: src/mem/cache/tags/iic.cc: src/mem/cache/tags/iic.hh: src/mem/cache/tags/lru.cc: src/mem/cache/tags/lru.hh: src/mem/cache/tags/repl/gen.cc: src/mem/cache/tags/repl/gen.hh: src/mem/cache/tags/repl/repl.cc: src/mem/cache/tags/repl/repl.hh: src/mem/cache/tags/split.cc: src/mem/cache/tags/split.hh: src/mem/cache/tags/split_blk.hh: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lifo.hh: src/mem/cache/tags/split_lru.cc: src/mem/cache/tags/split_lru.hh: Pulling an early version of the cache into the tree due to merging issues. Will apply patches and push. --HG-- extra : convert_revision : 3276e5fb9a6272681a1690babf2b586dd0e1f380 --- src/mem/cache/base_cache.cc | 330 +++++++ src/mem/cache/base_cache.hh | 480 ++++++++++ src/mem/cache/cache.cc | 146 +++ src/mem/cache/cache.hh | 264 ++++++ src/mem/cache/cache_blk.hh | 203 ++++ src/mem/cache/cache_builder.cc | 482 ++++++++++ src/mem/cache/cache_impl.hh | 661 +++++++++++++ src/mem/cache/coherence/coherence_protocol.cc | 566 ++++++++++++ src/mem/cache/coherence/coherence_protocol.hh | 263 ++++++ src/mem/cache/coherence/simple_coherence.hh | 161 ++++ src/mem/cache/coherence/uni_coherence.cc | 89 ++ src/mem/cache/coherence/uni_coherence.hh | 136 +++ src/mem/cache/miss/blocking_buffer.cc | 261 ++++++ src/mem/cache/miss/blocking_buffer.hh | 257 ++++++ src/mem/cache/miss/miss_queue.cc | 736 +++++++++++++++ src/mem/cache/miss/miss_queue.hh | 349 +++++++ src/mem/cache/miss/mshr.cc | 182 ++++ src/mem/cache/miss/mshr.hh | 179 ++++ src/mem/cache/miss/mshr_queue.cc | 269 ++++++ src/mem/cache/miss/mshr_queue.hh | 239 +++++ src/mem/cache/prefetch/base_prefetcher.cc | 250 +++++ src/mem/cache/prefetch/base_prefetcher.hh | 117 +++ src/mem/cache/prefetch/ghb_prefetcher.cc | 54 ++ src/mem/cache/prefetch/ghb_prefetcher.hh | 114 +++ src/mem/cache/prefetch/stride_prefetcher.cc | 54 ++ src/mem/cache/prefetch/stride_prefetcher.hh | 149 +++ src/mem/cache/prefetch/tagged_prefetcher.hh | 71 ++ .../cache/prefetch/tagged_prefetcher_impl.hh | 4 +- src/mem/cache/tags/base_tags.cc | 91 ++ src/mem/cache/tags/base_tags.hh | 143 +++ src/mem/cache/tags/fa_lru.cc | 334 +++++++ src/mem/cache/tags/fa_lru.hh | 346 +++++++ src/mem/cache/tags/iic.cc | 869 ++++++++++++++++++ src/mem/cache/tags/iic.hh | 574 ++++++++++++ src/mem/cache/tags/lru.cc | 310 +++++++ src/mem/cache/tags/lru.hh | 327 +++++++ src/mem/cache/tags/repl/gen.cc | 277 ++++++ src/mem/cache/tags/repl/gen.hh | 247 +++++ src/mem/cache/tags/repl/repl.cc | 43 + src/mem/cache/tags/repl/repl.hh | 129 +++ src/mem/cache/tags/split.cc | 478 ++++++++++ src/mem/cache/tags/split.hh | 335 +++++++ src/mem/cache/tags/split_blk.hh | 68 ++ src/mem/cache/tags/split_lifo.cc | 405 ++++++++ src/mem/cache/tags/split_lifo.hh | 350 +++++++ src/mem/cache/tags/split_lru.cc | 331 +++++++ src/mem/cache/tags/split_lru.hh | 333 +++++++ 47 files changed, 13054 insertions(+), 2 deletions(-) create mode 100644 src/mem/cache/base_cache.cc create mode 100644 src/mem/cache/base_cache.hh create mode 100644 src/mem/cache/cache.cc create mode 100644 src/mem/cache/cache.hh create mode 100644 src/mem/cache/cache_blk.hh create mode 100644 src/mem/cache/cache_builder.cc create mode 100644 src/mem/cache/cache_impl.hh create mode 100644 src/mem/cache/coherence/coherence_protocol.cc create mode 100644 src/mem/cache/coherence/coherence_protocol.hh create mode 100644 src/mem/cache/coherence/simple_coherence.hh create mode 100644 src/mem/cache/coherence/uni_coherence.cc create mode 100644 src/mem/cache/coherence/uni_coherence.hh create mode 100644 src/mem/cache/miss/blocking_buffer.cc create mode 100644 src/mem/cache/miss/blocking_buffer.hh create mode 100644 src/mem/cache/miss/miss_queue.cc create mode 100644 src/mem/cache/miss/miss_queue.hh create mode 100644 src/mem/cache/miss/mshr.cc create mode 100644 src/mem/cache/miss/mshr.hh create mode 100644 src/mem/cache/miss/mshr_queue.cc create mode 100644 src/mem/cache/miss/mshr_queue.hh create mode 100644 src/mem/cache/prefetch/base_prefetcher.cc create mode 100644 src/mem/cache/prefetch/base_prefetcher.hh create mode 100644 src/mem/cache/prefetch/ghb_prefetcher.cc create mode 100644 src/mem/cache/prefetch/ghb_prefetcher.hh create mode 100644 src/mem/cache/prefetch/stride_prefetcher.cc create mode 100644 src/mem/cache/prefetch/stride_prefetcher.hh create mode 100644 src/mem/cache/prefetch/tagged_prefetcher.hh create mode 100644 src/mem/cache/tags/base_tags.cc create mode 100644 src/mem/cache/tags/base_tags.hh create mode 100644 src/mem/cache/tags/fa_lru.cc create mode 100644 src/mem/cache/tags/fa_lru.hh create mode 100644 src/mem/cache/tags/iic.cc create mode 100644 src/mem/cache/tags/iic.hh create mode 100644 src/mem/cache/tags/lru.cc create mode 100644 src/mem/cache/tags/lru.hh create mode 100644 src/mem/cache/tags/repl/gen.cc create mode 100644 src/mem/cache/tags/repl/gen.hh create mode 100644 src/mem/cache/tags/repl/repl.cc create mode 100644 src/mem/cache/tags/repl/repl.hh create mode 100644 src/mem/cache/tags/split.cc create mode 100644 src/mem/cache/tags/split.hh create mode 100644 src/mem/cache/tags/split_blk.hh create mode 100644 src/mem/cache/tags/split_lifo.cc create mode 100644 src/mem/cache/tags/split_lifo.hh create mode 100644 src/mem/cache/tags/split_lru.cc create mode 100644 src/mem/cache/tags/split_lru.hh diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc new file mode 100644 index 0000000000..10a49edb1f --- /dev/null +++ b/src/mem/cache/base_cache.cc @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definition of BaseCache functions. + */ + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" +#include "cpu/base.hh" + +using namespace std; + +BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, + bool _isCpuSide) + : Port(_name), cache(_cache), isCpuSide(_isCpuSide) +{ + blocked = false; + //Start ports at null if more than one is created we should panic + cpuSidePort = NULL; + memSidePort = NULL; +} + +bool +BaseCache::CachePort::recvStatusChange(Port::Status status) +{ + cache->recvStatusChange(status, isCpuSide); +} + +void +BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) +{ + cache->getAddressRanges(resp, snoop); +} + +int +BaseCache::CachePort::deviceBlockSize() +{ + return cache->getBlockSize(); +} + +bool +BaseCache::CachePort::recvTiming(Packet *pkt) +{ + return cache->doTimingAccess(pkt, this, isCpuSide); +} + +Tick +BaseCache::CachePort::recvAtomic(Packet *pkt) +{ + return cache->doAtomicAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::recvFunctional(Packet *pkt) +{ + cache->doFunctionalAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::setBlocked() +{ + blocked = true; +} + +void +BaseCache::CachePort::clearBlocked() +{ + blocked = false; +} + +Port* +BaseCache::getPort(const std::string &if_name) +{ + if(if_name == "cpu_side") + { + if(cpuSidePort != NULL) + panic("Already have a cpu side for this cache\n"); + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if(if_name == "mem_side") + { + if(memSidePort != NULL) + panic("Already have a mem side for this cache\n"); + memSidePort = new CachePort(name() + "-mem_side_port", this, false); + return memSidePort; + } + else panic("Port name %s unrecognized\n", if_name); +} + +void +BaseCache::regStats() +{ + using namespace Stats; + + // Hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + hits[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_hits") + .desc("number of " + cstr + " hits") + .flags(total | nozero | nonan) + ; + } + + demandHits + .name(name() + ".demand_hits") + .desc("number of demand (read+write) hits") + .flags(total) + ; + demandHits = hits[Read] + hits[Write]; + + overallHits + .name(name() + ".overall_hits") + .desc("number of overall hits") + .flags(total) + ; + overallHits = demandHits + hits[Soft_Prefetch] + hits[Hard_Prefetch] + + hits[Writeback]; + + // Miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + misses[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_misses") + .desc("number of " + cstr + " misses") + .flags(total | nozero | nonan) + ; + } + + demandMisses + .name(name() + ".demand_misses") + .desc("number of demand (read+write) misses") + .flags(total) + ; + demandMisses = misses[Read] + misses[Write]; + + overallMisses + .name(name() + ".overall_misses") + .desc("number of overall misses") + .flags(total) + ; + overallMisses = demandMisses + misses[Soft_Prefetch] + + misses[Hard_Prefetch] + misses[Writeback]; + + // Miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + missLatency[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_miss_latency") + .desc("number of " + cstr + " miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMissLatency + .name(name() + ".demand_miss_latency") + .desc("number of demand (read+write) miss cycles") + .flags(total) + ; + demandMissLatency = missLatency[Read] + missLatency[Write]; + + overallMissLatency + .name(name() + ".overall_miss_latency") + .desc("number of overall miss cycles") + .flags(total) + ; + overallMissLatency = demandMissLatency + missLatency[Soft_Prefetch] + + missLatency[Hard_Prefetch]; + + // access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + accesses[access_idx] + .name(name() + "." + cstr + "_accesses") + .desc("number of " + cstr + " accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + + accesses[access_idx] = hits[access_idx] + misses[access_idx]; + } + + demandAccesses + .name(name() + ".demand_accesses") + .desc("number of demand (read+write) accesses") + .flags(total) + ; + demandAccesses = demandHits + demandMisses; + + overallAccesses + .name(name() + ".overall_accesses") + .desc("number of overall (read+write) accesses") + .flags(total) + ; + overallAccesses = overallHits + overallMisses; + + // miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + missRate[access_idx] + .name(name() + "." + cstr + "_miss_rate") + .desc("miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + missRate[access_idx] = misses[access_idx] / accesses[access_idx]; + } + + demandMissRate + .name(name() + ".demand_miss_rate") + .desc("miss rate for demand accesses") + .flags(total) + ; + demandMissRate = demandMisses / demandAccesses; + + overallMissRate + .name(name() + ".overall_miss_rate") + .desc("miss rate for overall accesses") + .flags(total) + ; + overallMissRate = overallMisses / overallAccesses; + + // miss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMissLatency[access_idx] + .name(name() + "." + cstr + "_avg_miss_latency") + .desc("average " + cstr + " miss latency") + .flags(total | nozero | nonan) + ; + + avgMissLatency[access_idx] = + missLatency[access_idx] / misses[access_idx]; + } + + demandAvgMissLatency + .name(name() + ".demand_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + demandAvgMissLatency = demandMissLatency / demandMisses; + + overallAvgMissLatency + .name(name() + ".overall_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + overallAvgMissLatency = overallMissLatency / overallMisses; + + blocked_cycles.init(NUM_BLOCKED_CAUSES); + blocked_cycles + .name(name() + ".blocked_cycles") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + + blocked_causes.init(NUM_BLOCKED_CAUSES); + blocked_causes + .name(name() + ".blocked") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked + .name(name() + ".avg_blocked_cycles") + .desc("average number of cycles each access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked = blocked_cycles / blocked_causes; + + fastWrites + .name(name() + ".fast_writes") + .desc("number of fast writes performed") + ; + + cacheCopies + .name(name() + ".cache_copies") + .desc("number of cache copies performed") + ; +} diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh new file mode 100644 index 0000000000..0170b02494 --- /dev/null +++ b/src/mem/cache/base_cache.hh @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declares a basic cache interface BaseCache. + */ + +#ifndef __BASE_CACHE_HH__ +#define __BASE_CACHE_HH__ + +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/trace.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" + +/** + * Reasons for Caches to be Blocked. + */ +enum BlockedCause{ + Blocked_NoMSHRs, + Blocked_NoTargets, + Blocked_NoWBBuffers, + Blocked_Coherence, + Blocked_Copy, + NUM_BLOCKED_CAUSES +}; + +/** + * Reasons for cache to request a bus. + */ +enum RequestCause{ + Request_MSHR, + Request_WB, + Request_Coherence, + Request_PF +}; + +/** + * A basic cache interface. Implements some common functions for speed. + */ +class BaseCache : public MemObject +{ + class CachePort : public Port + { + BaseCache *cache; + + public: + CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); + + private: + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop); + + virtual int deviceBlockSize(); + + void setBlocked(); + + void clearBlocked(); + + bool blocked; + + bool isCpuSide; + }; + + struct CacheEvent : public Event + { + Packet *pkt; + CachePort *cachePort; + + CacheResponseEvent(Packet *pkt, CachePort *cachePort); + void process(); + const char *description(); + } + + protected: + CachePort *cpuSidePort; + CachePort *memSidePort; + + public: + virtual Port *getPort(const std::string &if_name); + + private: + //To be defined in cache_impl.hh not in base class + virtual bool doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide); + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); + virtual void recvStatusChange(Port::Status status, bool isCpuSide); + + /** + * Bit vector of the blocking reasons for the access path. + * @sa #BlockedCause + */ + uint8_t blocked; + + /** + * Bit vector for the blocking reasons for the snoop path. + * @sa #BlockedCause + */ + uint8_t blockedSnoop; + + /** + * Bit vector for the outstanding requests for the master interface. + */ + uint8_t masterRequests; + + /** + * Bit vector for the outstanding requests for the slave interface. + */ + uint8_t slaveRequests; + + protected: + + /** True if this cache is connected to the CPU. */ + bool topLevelCache; + + /** Stores time the cache blocked for statistics. */ + Tick blockedCycle; + + /** Block size of this cache */ + const int blkSize; + + /** The number of misses to trigger an exit event. */ + Counter missCount; + + public: + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of hits per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> hits[NUM_MEM_CMDS]; + /** Number of hits for demand accesses. */ + Stats::Formula demandHits; + /** Number of hit for all accesses. */ + Stats::Formula overallHits; + + /** Number of misses per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> misses[NUM_MEM_CMDS]; + /** Number of misses for demand accesses. */ + Stats::Formula demandMisses; + /** Number of misses for all accesses. */ + Stats::Formula overallMisses; + + /** + * Total number of cycles per thread/command spent waiting for a miss. + * Used to calculate the average miss latency. + */ + Stats::Vector<> missLatency[NUM_MEM_CMDS]; + /** Total number of cycles spent waiting for demand misses. */ + Stats::Formula demandMissLatency; + /** Total number of cycles spent waiting for all misses. */ + Stats::Formula overallMissLatency; + + /** The number of accesses per command and thread. */ + Stats::Formula accesses[NUM_MEM_CMDS]; + /** The number of demand accesses. */ + Stats::Formula demandAccesses; + /** The number of overall accesses. */ + Stats::Formula overallAccesses; + + /** The miss rate per command and thread. */ + Stats::Formula missRate[NUM_MEM_CMDS]; + /** The miss rate of all demand accesses. */ + Stats::Formula demandMissRate; + /** The miss rate for all accesses. */ + Stats::Formula overallMissRate; + + /** The average miss latency per command and thread. */ + Stats::Formula avgMissLatency[NUM_MEM_CMDS]; + /** The average miss latency for demand misses. */ + Stats::Formula demandAvgMissLatency; + /** The average miss latency for all misses. */ + Stats::Formula overallAvgMissLatency; + + /** The total number of cycles blocked for each blocked cause. */ + Stats::Vector<> blocked_cycles; + /** The number of times this cache blocked for each blocked cause. */ + Stats::Vector<> blocked_causes; + + /** The average number of cycles blocked for each blocked cause. */ + Stats::Formula avg_blocked; + + /** The number of fast writes (WH64) performed. */ + Stats::Scalar<> fastWrites; + + /** The number of cache copies performed. */ + Stats::Scalar<> cacheCopies; + + /** + * @} + */ + + /** + * Register stats for this object. + */ + virtual void regStats(); + + public: + + class Params + { + public: + /** List of address ranges of this cache. */ + std::vector > addrRange; + /** The hit latency for this cache. */ + int hitLatency; + /** The block size of this cache. */ + int blkSize; + /** + * The maximum number of misses this cache should handle before + * ending the simulation. + */ + Counter maxMisses; + + /** + * Construct an instance of this parameter class. + */ + Params(std::vector > addr_range, + int hit_latency, int _blkSize, Counter max_misses) + : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize), + maxMisses(max_misses) + { + } + }; + + /** + * Create and initialize a basic cache object. + * @param name The name of this cache. + * @param hier_params Pointer to the HierParams object for this hierarchy + * of this cache. + * @param params The parameter object for this BaseCache. + */ + BaseCache(const std::string &name, HierParams *hier_params, Params ¶ms) + : BaseMem(name, hier_params, params.hitLatency, params.addrRange), + blocked(0), blockedSnoop(0), masterRequests(0), slaveRequests(0), + topLevelCache(false), blkSize(params.blkSize), + missCount(params.maxMisses) + { + } + + /** + * Query block size of a cache. + * @return The block size + */ + int getBlockSize() const + { + return blkSize; + } + + /** + * Returns true if this cache is connect to the CPU. + * @return True if this is a L1 cache. + */ + bool isTopLevel() + { + return topLevelCache; + } + + /** + * Returns true if the cache is blocked for accesses. + */ + bool isBlocked() + { + return blocked != 0; + } + + /** + * Returns true if the cache is blocked for snoops. + */ + bool isBlockedForSnoop() + { + return blockedSnoop != 0; + } + + /** + * Marks the access path of the cache as blocked for the given cause. This + * also sets the blocked flag in the slave interface. + * @param cause The reason for the cache blocking. + */ + void setBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + if (blocked == 0) { + blocked_causes[cause]++; + blockedCycle = curTick; + } + blocked |= flag; + DPRINTF(Cache,"Blocking for cause %s\n", cause); + cpuSidePort->setBlocked(); + } + + /** + * Marks the snoop path of the cache as blocked for the given cause. This + * also sets the blocked flag in the master interface. + * @param cause The reason to block the snoop path. + */ + void setBlockedForSnoop(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blockedSnoop |= flag; + memSidePort->setBlocked(); + } + + /** + * Marks the cache as unblocked for the given cause. This also clears the + * blocked flags in the appropriate interfaces. + * @param cause The newly unblocked cause. + * @warning Calling this function can cause a blocked request on the bus to + * access the cache. The cache must be in a state to handle that request. + */ + void clearBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blocked &= ~flag; + blockedSnoop &= ~flag; + DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n", + cause, blocked); + if (!isBlocked()) { + blocked_cycles[cause] += curTick - blockedCycle; + DPRINTF(Cache,"Unblocking from all causes\n"); + cpuSidePort->clearBlocked(); + } + if (!isBlockedForSnoop()) { + memSidePort->clearBlocked(); + } + + } + + /** + * True if the master bus should be requested. + * @return True if there are outstanding requests for the master bus. + */ + bool doMasterRequest() + { + return masterRequests != 0; + } + + /** + * Request the master bus for the given cause and time. + * @param cause The reason for the request. + * @param time The time to make the request. + */ + void setMasterRequest(RequestCause cause, Tick time) + { + uint8_t flag = 1<pktuest(time); + } + + /** + * Clear the master bus request for the given cause. + * @param cause The request reason to clear. + */ + void clearMasterRequest(RequestCause cause) + { + uint8_t flag = 1<pktuest(time); + } + + /** + * Clear the slave bus request for the given reason. + * @param cause The request reason to clear. + */ + void clearSlaveRequest(RequestCause cause) + { + uint8_t flag = 1<respond(pkt,time); + } + + /** + * Send a reponse to the slave interface and calculate miss latency. + * @param req The request to respond to. + * @param time The time the response is ready. + */ + void respondToMiss(Packet *pkt, Tick time) + { + if (!pkt->isUncacheable()) { + missLatency[pkt->cmd.toIndex()][pkt->thread_num] += time - pkt->time; + } + assert("Implement\n" && 0); +// si->respond(pkt,time); + } + + /** + * Suppliess the data if cache to cache transfers are enabled. + * @param req The bus transaction to fulfill. + */ + void respondToSnoop(Packet *pkt) + { + assert("Implement\n" && 0); +// mi->respond(pkt,curTick + hitLatency); + } + + /** + * Notification from master interface that a address range changed. Nothing + * to do for a cache. + */ + void rangeChange() {} +}; + +#endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc new file mode 100644 index 0000000000..db66c096e6 --- /dev/null +++ b/src/mem/cache/cache.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Lisa Hsu + * Kevin Lim + */ + +/** + * @file + * Cache template instantiations. + */ + +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" + +#include "mem/cache/tags/cache_tags.hh" + +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +#include "mem/cache/cache_impl.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + + +#if defined(USE_CACHE_FALRU) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_IIC) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_LRU) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh new file mode 100644 index 0000000000..dcb22a99c9 --- /dev/null +++ b/src/mem/cache/cache.hh @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Steve Reinhardt + */ + +/** + * @file + * Describes a cache based on template policies. + */ + +#ifndef __CACHE_HH__ +#define __CACHE_HH__ + +#include "base/misc.hh" // fatal, panic, and warn +#include "cpu/smt.hh" // SMT_MAX_THREADS + +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +// forward declarations +class Bus; +class ExecContext; + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class Cache : public BaseCache +{ + public: + /** Define the type of cache block to use. */ + typedef typename TagStore::BlkType BlkType; + + bool prefetchAccess; + protected: + + /** Tag and data Storage */ + TagStore *tags; + /** Miss and Writeback handler */ + Buffering *missQueue; + /** Coherence protocol. */ + Coherence *coherence; + + /** Prefetcher */ + Prefetcher *prefetcher; + + /** Do fast copies in this cache. */ + bool doCopy; + + /** Block on a delayed copy. */ + bool blockOnCopy; + + /** + * The clock ratio of the outgoing bus. + * Used for calculating critical word first. + */ + int busRatio; + + /** + * The bus width in bytes of the outgoing bus. + * Used for calculating critical word first. + */ + int busWidth; + + /** + * A permanent mem req to always be used to cause invalidations. + * Used to append to target list, to cause an invalidation. + */ + Packet * invalidatePkt; + + /** + * Temporarily move a block into a MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(Addr addr, int asid); + + /** + * Temporarily move a block into an existing MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(MSHR *mshr); + + public: + + class Params + { + public: + TagStore *tags; + Buffering *missQueue; + Coherence *coherence; + bool doCopy; + bool blockOnCopy; + BaseCache::Params baseParams; + Bus *in; + Bus *out; + Prefetcher *prefetcher; + bool prefetchAccess; + + Params(TagStore *_tags, Buffering *mq, Coherence *coh, + bool do_copy, BaseCache::Params params, Bus * in_bus, + Bus * out_bus, Prefetcher *_prefetcher, + bool prefetch_access) + : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy), + blockOnCopy(false), baseParams(params), in(in_bus), out(out_bus), + prefetcher(_prefetcher), prefetchAccess(prefetch_access) + { + } + }; + + /** Instantiates a basic cache object. */ + Cache(const std::string &_name, HierParams *hier_params, Params ¶ms); + + void regStats(); + + /** + * Performs the access specified by the request. + * @param req The request to perform. + * @return The result of the access. + */ + MemAccessResult access(Packet * &pkt); + + /** + * Selects a request to send on the bus. + * @return The memory request to service. + */ + Packet * getPacket(); + + /** + * Was the request was sent successfully? + * @param req The request. + * @param success True if the request was sent successfully. + */ + void sendResult(Packet * &pkt, bool success); + + /** + * Handles a response (cache line fill/write ack) from the bus. + * @param req The request being responded to. + */ + void handleResponse(Packet * &pkt); + + /** + * Start handling a copy transaction. + * @param req The copy request to perform. + */ + void startCopy(Packet * &pkt); + + /** + * Handle a delayed copy transaction. + * @param req The delayed copy request to continue. + * @param addr The address being responded to. + * @param blk The block of the current response. + * @param mshr The mshr being handled. + */ + void handleCopy(Packet * &pkt, Addr addr, BlkType *blk, MSHR *mshr); + + /** + * Selects a coherence message to forward to lower levels of the hierarchy. + * @return The coherence message to forward. + */ + Packet * getCoherenceReq(); + + /** + * Snoops bus transactions to maintain coherence. + * @param req The current bus transaction. + */ + void snoop(Packet * &pkt); + + void snoopResponse(Packet * &pkt); + + /** + * Invalidates the block containing address if found. + * @param addr The address to look for. + * @param asid The address space ID of the address. + * @todo Is this function necessary? + */ + void invalidateBlk(Addr addr, int asid); + + /** + * Aquash all requests associated with specified thread. + * intended for use by I-cache. + * @param thread_number The thread to squash. + */ + void squash(int thread_number) + { + missQueue->squash(thread_number); + } + + /** + * Return the number of outstanding misses in a Cache. + * Default returns 0. + * + * @retval unsigned The number of missing still outstanding. + */ + unsigned outstandingMisses() const + { + return missQueue->getMisses(); + } + + /** + * Send a response to the slave interface. + * @param req The request being responded to. + * @param time The time the response is ready. + */ + void respond(Packet * &pkt, Tick time) + { + si->respond(pkt,time); + } + + /** + * Perform the access specified in the request and return the estimated + * time of completion. This function can either update the hierarchy state + * or just perform the access wherever the data is found depending on the + * state of the update flag. + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick probe(Packet * &pkt, bool update); + + /** + * Snoop for the provided request in the cache and return the estimated + * time of completion. + * @todo Can a snoop probe not change state? + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick snoopProbe(Packet * &pkt, bool update); +}; + +#endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh new file mode 100644 index 0000000000..cf1bd20e2e --- /dev/null +++ b/src/mem/cache/cache_blk.hh @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definitions of a simple cache block class. + */ + +#ifndef __CACHE_BLK_HH__ +#define __CACHE_BLK_HH__ + +#include "sim/root.hh" // for Tick +#include "arch/isa_traits.hh" // for Addr +#include "cpu/exec_context.hh" + +/** + * Cache block status bit assignments + */ +enum CacheBlkStatusBits { + /** valid, readable */ + BlkValid = 0x01, + /** write permission */ + BlkWritable = 0x02, + /** dirty (modified) */ + BlkDirty = 0x04, + /** compressed */ + BlkCompressed = 0x08, + /** block was referenced */ + BlkReferenced = 0x10, + /** block was a hardware prefetch yet unaccessed*/ + BlkHWPrefetched = 0x20 +}; + +/** + * A Basic Cache block. + * Contains the tag, status, and a pointer to data. + */ +class CacheBlk +{ + public: + /** The address space ID of this block. */ + int asid; + /** Data block tag value. */ + Addr tag; + /** + * Contains a copy of the data in this block for easy access. This is used + * for efficient execution when the data could be actually stored in + * another format (COW, compressed, sub-blocked, etc). In all cases the + * data stored here should be kept consistant with the actual data + * referenced by this block. + */ + uint8_t *data; + /** the number of bytes stored in this block. */ + int size; + + /** block state: OR of CacheBlkStatusBit */ + typedef unsigned State; + + /** The current status of this block. @sa CacheBlockStatusBits */ + State status; + + /** Which curTick will this block be accessable */ + Tick whenReady; + + /** Save the exec context so that writebacks can use them. */ + ExecContext *xc; + + /** + * The set this block belongs to. + * @todo Move this into subclasses when we fix CacheTags to use them. + */ + int set; + + /** Number of references to this block since it was brought in. */ + int refCount; + + CacheBlk() + : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), xc(0), + set(-1), refCount(0) + {} + + /** + * Copy the state of the given block into this one. + * @param rhs The block to copy. + * @return a const reference to this block. + */ + const CacheBlk& operator=(const CacheBlk& rhs) + { + asid = rhs.asid; + tag = rhs.tag; + data = rhs.data; + size = rhs.size; + status = rhs.status; + whenReady = rhs.whenReady; + xc = rhs.xc; + set = rhs.set; + refCount = rhs.refCount; + return *this; + } + + /** + * Checks the write permissions of this block. + * @return True if the block is writable. + */ + bool isWritable() const + { + const int needed_bits = BlkWritable | BlkValid; + return (status & needed_bits) == needed_bits; + } + + /** + * Checks that a block is valid (readable). + * @return True if the block is valid. + */ + bool isValid() const + { + return (status & BlkValid) != 0; + } + + /** + * Check to see if a block has been written. + * @return True if the block is dirty. + */ + bool isModified() const + { + return (status & BlkDirty) != 0; + } + + /** + * Check to see if this block contains compressed data. + * @return True iF the block's data is compressed. + */ + bool isCompressed() const + { + return (status & BlkCompressed) != 0; + } + + /** + * Check if this block has been referenced. + * @return True if the block has been referenced. + */ + bool isReferenced() const + { + return (status & BlkReferenced) != 0; + } + + /** + * Check if this block was the result of a hardware prefetch, yet to + * be touched. + * @return True if the block was a hardware prefetch, unaccesed. + */ + bool isPrefetch() const + { + return (status & BlkHWPrefetched) != 0; + } + + +}; + +/** + * Output a CacheBlk to the given ostream. + * @param out The stream for the output. + * @param blk The cache block to print. + * + * @return The output stream. + */ +inline std::ostream & +operator<<(std::ostream &out, const CacheBlk &blk) +{ + out << std::hex << std::endl; + out << " Tag: " << blk.tag << std::endl; + out << " Status: " << blk.status << std::endl; + + return(out << std::dec); +} + +#endif //__CACHE_BLK_HH__ diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc new file mode 100644 index 0000000000..e3efb9bc36 --- /dev/null +++ b/src/mem/cache/cache_builder.cc @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * @file + * Simobject instatiation of caches. + */ +#include + +// Must be included first to determine which caches we want +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" +#include "mem/config/prefetch.hh" + +#include "mem/cache/base_cache.hh" +#include "mem/cache/cache.hh" +#include "mem/bus/bus.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +// Tag Templates +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +// Compression Templates +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +// CacheTags Templates +#include "mem/cache/tags/cache_tags.hh" + +// MissQueue Templates +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +// Coherence Templates +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +// Bus Interfaces +#include "mem/bus/slave_interface.hh" +#include "mem/bus/master_interface.hh" +#include "mem/memory_interface.hh" + +#include "mem/trace/mem_trace_writer.hh" + +//Prefetcher Headers +#if defined(USE_GHB) +#include "mem/cache/prefetch/ghb_prefetcher.hh" +#endif +#if defined(USE_TAGGED) +#include "mem/cache/prefetch/tagged_prefetcher.hh" +#endif +#if defined(USE_STRIDED) +#include "mem/cache/prefetch/stride_prefetcher.hh" +#endif + + +using namespace std; +using namespace TheISA; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + Param size; + Param assoc; + Param block_size; + Param latency; + Param mshrs; + Param tgts_per_mshr; + Param write_buffers; + Param prioritizeRequests; + SimObjectParam in_bus; + SimObjectParam out_bus; + Param do_copy; + SimObjectParam protocol; + Param trace_addr; + Param hash_delay; +#if defined(USE_CACHE_IIC) + SimObjectParam repl; +#endif + Param compressed_bus; + Param store_compressed; + Param adaptive_compression; + Param compression_latency; + Param subblock_size; + Param max_miss_count; + SimObjectParam hier; + VectorParam > addr_range; + SimObjectParam mem_trace; + Param split; + Param split_size; + Param lifo; + Param two_queue; + Param prefetch_miss; + Param prefetch_access; + Param prefetcher_size; + Param prefetch_past_page; + Param prefetch_serial_squash; + Param prefetch_latency; + Param prefetch_degree; + Param prefetch_policy; + Param prefetch_cache_check_push; + Param prefetch_use_cpu_id; + Param prefetch_data_accesses_only; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) + + INIT_PARAM(size, "capacity in bytes"), + INIT_PARAM(assoc, "associativity"), + INIT_PARAM(block_size, "block size in bytes"), + INIT_PARAM(latency, "hit latency in CPU cycles"), + INIT_PARAM(mshrs, "number of MSHRs (max outstanding requests)"), + INIT_PARAM(tgts_per_mshr, "max number of accesses per MSHR"), + INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), + INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", + false), + INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), + INIT_PARAM(out_bus, "outgoing bus object"), + INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false), + INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), + INIT_PARAM_DFLT(trace_addr, "address to trace", 0), + + INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1), +#if defined(USE_CACHE_IIC) + INIT_PARAM_DFLT(repl, "replacement policy",NULL), +#endif + INIT_PARAM_DFLT(compressed_bus, + "This cache connects to a compressed memory", + false), + INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache", + false), + INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme", + false), + INIT_PARAM_DFLT(compression_latency, + "Latency in cycles of compression algorithm", + 0), + INIT_PARAM_DFLT(subblock_size, + "Size of subblock in IIC used for compression", + 0), + INIT_PARAM_DFLT(max_miss_count, + "The number of misses to handle before calling exit", + 0), + INIT_PARAM_DFLT(hier, + "Hierarchy global variables", + &defaultHierParams), + INIT_PARAM_DFLT(addr_range, "The address range in bytes", + vector >(1,RangeIn((Addr)0, MaxAddr))), + INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), + INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false), + INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0), + INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false), + INIT_PARAM_DFLT(two_queue, "whether the lifo should have two queue replacement", false), + INIT_PARAM_DFLT(prefetch_miss, "wheter you are using the hardware prefetcher from Miss stream", false), + INIT_PARAM_DFLT(prefetch_access, "wheter you are using the hardware prefetcher from Access stream", false), + INIT_PARAM_DFLT(prefetcher_size, "Number of entries in the harware prefetch queue", 100), + INIT_PARAM_DFLT(prefetch_past_page, "Allow prefetches to cross virtual page boundaries", false), + INIT_PARAM_DFLT(prefetch_serial_squash, "Squash prefetches with a later time on a subsequent miss", false), + INIT_PARAM_DFLT(prefetch_latency, "Latency of the prefetcher", 10), + INIT_PARAM_DFLT(prefetch_degree, "Degree of the prefetch depth", 1), + INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"), + INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true), + INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true), + INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false) +END_INIT_SIM_OBJECT_PARAMS(BaseCache) + + +#define BUILD_CACHE(t, comp, b, c) do { \ + Prefetcher, b> *pf; \ + if (pf_policy == "tagged") { \ + BUILD_TAGGED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "stride") { \ + BUILD_STRIDED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "ghb") { \ + BUILD_GHB_PREFETCHER(t, comp, b); \ + } \ + else { \ + BUILD_NULL_PREFETCHER(t, comp, b); \ + } \ + Cache, b, c>::Params params(tagStore, mq, coh, \ + do_copy, base_params, \ + in_bus, out_bus, pf, \ + prefetch_access); \ + Cache, b, c> *retval = \ + new Cache, b, c>(getInstanceName(), hier, \ + params); \ + if (in_bus == NULL) { \ + retval->setSlaveInterface(new MemoryInterface, b, c> >(getInstanceName(), hier, retval, mem_trace)); \ + } else { \ + retval->setSlaveInterface(new SlaveInterface, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \ + } \ + retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ + out_bus->rangeChange(); \ + return retval; \ + } while (0) + +#define BUILD_CACHE_PANIC(x) do { \ + panic("%s not compiled into M5", x); \ + } while (0) + +#if defined(USE_LZSS_COMPRESSION) +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, LZSSCompression, b, c); \ + } else { \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#else +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + BUILD_CACHE_PANIC("compressed caches"); \ + } else { \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#endif + +#if defined(USE_CACHE_FALRU) +#define BUILD_FALRU_CACHE(b,c) do { \ + FALRU *tags = new FALRU(block_size, size, latency); \ + BUILD_COMPRESSED_CACHE(FALRU, tags, b, c); \ + } while (0) +#else +#define BUILD_FALRU_CACHE(b, c) BUILD_CACHE_PANIC("falru cache") +#endif + +#if defined(USE_CACHE_LRU) +#define BUILD_LRU_CACHE(b, c) do { \ + LRU *tags = new LRU(numSets, block_size, assoc, latency); \ + BUILD_COMPRESSED_CACHE(LRU, tags, b, c); \ + } while (0) +#else +#define BUILD_LRU_CACHE(b, c) BUILD_CACHE_PANIC("lru cache") +#endif + +#if defined(USE_CACHE_SPLIT) +#define BUILD_SPLIT_CACHE(b, c) do { \ + Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \ + two_queue, latency); \ + BUILD_COMPRESSED_CACHE(Split, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_CACHE(b, c) BUILD_CACHE_PANIC("split cache") +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#define BUILD_SPLIT_LIFO_CACHE(b, c) do { \ + SplitLIFO *tags = new SplitLIFO(block_size, size, assoc, \ + latency, two_queue, -1); \ + BUILD_COMPRESSED_CACHE(SplitLIFO, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_LIFO_CACHE(b, c) BUILD_CACHE_PANIC("lifo cache") +#endif + +#if defined(USE_CACHE_IIC) +#define BUILD_IIC_CACHE(b ,c) do { \ + IIC *tags = new IIC(iic_params); \ + BUILD_COMPRESSED_CACHE(IIC, tags, b, c); \ + } while (0) +#else +#define BUILD_IIC_CACHE(b, c) BUILD_CACHE_PANIC("iic") +#endif + +#define BUILD_CACHES(b, c) do { \ + if (repl == NULL) { \ + if (numSets == 1) { \ + BUILD_FALRU_CACHE(b, c); \ + } else { \ + if (split == true) { \ + BUILD_SPLIT_CACHE(b, c); \ + } else if (lifo == true) { \ + BUILD_SPLIT_LIFO_CACHE(b, c); \ + } else { \ + BUILD_LRU_CACHE(b, c); \ + } \ + } \ + } else { \ + BUILD_IIC_CACHE(b, c); \ + } \ + } while (0) + +#define BUILD_COHERENCE(b) do { \ + if (protocol == NULL) { \ + UniCoherence *coh = new UniCoherence(); \ + BUILD_CACHES(b, UniCoherence); \ + } else { \ + SimpleCoherence *coh = new SimpleCoherence(protocol); \ + BUILD_CACHES(b, SimpleCoherence); \ + } \ + } while (0) + +#if defined(USE_TAGGED) +#define BUILD_TAGGED_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_TAGGED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Tagged Prefetcher") +#endif + +#if defined(USE_STRIDED) +#define BUILD_STRIDED_PREFETCHER(t, comp, b) pf = new \ + StridePrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_STRIDED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Stride Prefetcher") +#endif + +#if defined(USE_GHB) +#define BUILD_GHB_PREFETCHER(t, comp, b) pf = new \ + GHBPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_GHB_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("GHB Prefetcher") +#endif + +#if defined(USE_TAGGED) +#define BUILD_NULL_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_NULL_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)") +#endif + +CREATE_SIM_OBJECT(BaseCache) +{ + string name = getInstanceName(); + int numSets = size / (assoc * block_size); + string pf_policy = prefetch_policy; + if (subblock_size == 0) { + subblock_size = block_size; + } + + // Build BaseCache param object + BaseCache::Params base_params(addr_range, latency, + block_size, max_miss_count); + + //Warnings about prefetcher policy + if (pf_policy == "none" && (prefetch_miss || prefetch_access)) { + panic("With no prefetcher, you shouldn't prefetch from" + " either miss or access stream\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && !(prefetch_miss || prefetch_access)) { + warn("With this prefetcher you should chose a prefetch" + " stream (miss or access)\nNo Prefetching will occur\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && prefetch_miss && prefetch_access) { + panic("Can't do prefetches from both miss and access" + " stream\n"); + } + if (pf_policy != "tagged" && pf_policy != "stride" && + pf_policy != "ghb" && pf_policy != "none") { + panic("Unrecognized form of a prefetcher: %s, try using" + "['none','stride','tagged','ghb']\n", pf_policy); + } + +#if defined(USE_CACHE_IIC) + // Build IIC params + IIC::Params iic_params; + iic_params.size = size; + iic_params.numSets = numSets; + iic_params.blkSize = block_size; + iic_params.assoc = assoc; + iic_params.hashDelay = hash_delay; + iic_params.hitLatency = latency; + iic_params.rp = repl; + iic_params.subblockSize = subblock_size; +#else + const void *repl = NULL; +#endif + + if (mshrs == 1 || out_bus->doEvents() == false) { + BlockingBuffer *mq = new BlockingBuffer(true); + BUILD_COHERENCE(BlockingBuffer); + } else { + MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers, + true, prefetch_miss); + BUILD_COHERENCE(MissQueue); + } + return NULL; +} + +REGISTER_SIM_OBJECT("BaseCache", BaseCache) + + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh new file mode 100644 index 0000000000..3dd8d74cde --- /dev/null +++ b/src/mem/cache/cache_impl.hh @@ -0,0 +1,661 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Nathan Binkert + */ + +/** + * @file + * Cache definitions. + */ + +#include +#include + +#include +#include +#include + +#include "sim/host.hh" +#include "base/misc.hh" +#include "cpu/smt.hh" + +#include "mem/cache/cache.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +#include "mem/bus/bus.hh" + +#include "mem/bus/slave_interface.hh" +#include "mem/memory_interface.hh" +#include "mem/bus/master_interface.hh" + +#include "mem/mem_debug.hh" + +#include "sim/sim_events.hh" // for SimExitEvent + +using namespace std; + +template +bool +Cache:: +doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + access(pkt); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoop(pkt); + } +} + +template +Tick +Cache:: +doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + probe(pkt, true); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoopProbe(pkt, true); + } +} + +template +void +Cache:: +doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + probe(pkt, false); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoopProbe(pkt, false); + } +} + +template +void +Cache:: +recvStatusChange(Port::Status status, bool isCpuSide) +{ + +} + + +template +Cache:: +Cache(const std::string &_name, HierParams *hier_params, + Cache::Params ¶ms) + : BaseCache(_name, hier_params, params.baseParams), + prefetchAccess(params.prefetchAccess), + tags(params.tags), missQueue(params.missQueue), + coherence(params.coherence), prefetcher(params.prefetcher), + doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) +{ + if (params.in == NULL) { + topLevelCache = true; + } + tags->setCache(this, params.out->width, params.out->clockRate); + tags->setPrefetcher(prefetcher); + missQueue->setCache(this); + missQueue->setPrefetcher(prefetcher); + coherence->setCache(this); + prefetcher->setCache(this); + prefetcher->setTags(tags); + prefetcher->setBuffer(missQueue); + invalidatePkt = new Packet; + invalidatePkt->cmd = Invalidate; +} + +template +void +Cache::regStats() +{ + BaseCache::regStats(); + tags->regStats(name()); + missQueue->regStats(name()); + coherence->regStats(name()); + prefetcher->regStats(name()); +} + +template +MemAccessResult +Cache::access(Packet &pkt) +{ + MemDebug::cacheAccess(pkt); + BlkType *blk = NULL; + PacketList* writebacks; + int size = blkSize; + int lat = hitLatency; + if (prefetchAccess) { + //We are determining prefetches on access stream, call prefetcher + prefetcher->handleMiss(pkt, curTick); + } + if (!pkt->isUncacheable()) { + if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() + && !pkt->cmd.isWrite()) { + //Upgrade or Invalidate + //Look into what happens if two slave caches on bus + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), + pkt->paddr & ~((Addr)blkSize - 1)); + + //@todo Should this return latency have the hit latency in it? +// respond(pkt,curTick+lat); + pkt->flags |= SATISFIED; + return MA_HIT; + } + blk = tags->handleAccess(pkt, lat, writebacks); + } else { + size = pkt->size; + } + // If this is a block size write/hint (WH64) allocate the block here + // if the coherence protocol allows it. + /** @todo make the fast write alloc (wh64) work with coherence. */ + /** @todo Do we want to do fast writes for writebacks as well? */ + if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && + (pkt->cmd == Write || pkt->cmd == WriteInvalidate) ) { + // not outstanding misses, can do this + MSHR* outstanding_miss = missQueue->findMSHR(pkt->paddr, pkt->req->asid); + if (pkt->cmd ==WriteInvalidate || !outstanding_miss) { + if (outstanding_miss) { + warn("WriteInv doing a fastallocate" + "with an outstanding miss to the same address\n"); + } + blk = tags->handleFill(NULL, pkt, BlkValid | BlkWritable, + writebacks); + ++fastWrites; + } + } + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + writebacks.pop_front(); + } + DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); + if (blk) { + // Hit + hits[pkt->cmd.toIndex()][pkt->thread_num]++; + // clear dirty bit if write through + if (!pkt->cmd.isNoResponse()) + respond(pkt, curTick+lat); + return MA_HIT; + } + + // Miss + if (!pkt->isUncacheable()) { + misses[pkt->cmd.toIndex()][pkt->thread_num]++; + /** @todo Move miss count code into BaseCache */ + if (missCount) { + --missCount; + if (missCount == 0) + new SimExitEvent("A cache reached the maximum miss count"); + } + } + missQueue->handleMiss(pkt, size, curTick + hitLatency); + return MA_CACHE_MISS; +} + + +template +Packet * +Cache::getPacket() +{ + Packet * pkt = missQueue->getPacket(); + if (pkt) { + if (!pkt->isUncacheable()) { + if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->thread_num]++; + BlkType *blk = tags->findBlock(pkt); + Packet::Command cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + missQueue->setBusCmd(pkt, cmd); + } + } + + assert(!doMasterPktuest() || missQueue->havePending()); + assert(!pkt || pkt->time <= curTick); + return pkt; +} + +template +void +Cache::sendResult(MemPktPtr &pkt, bool success) +{ + if (success) { + missQueue->markInService(pkt); + //Temp Hack for UPGRADES + if (pkt->cmd == Upgrade) { + handleResponse(pkt); + } + } else if (pkt && !pkt->isUncacheable()) { + missQueue->restoreOrigCmd(pkt); + } +} + +template +void +Cache::handleResponse(Packet * &pkt) +{ + BlkType *blk = NULL; + if (pkt->senderState) { + MemDebug::cacheResponse(pkt); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->paddr, + pkt->paddr & (((ULL(1))<<48)-1)); + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + blk = tags->findBlock(pkt); + CacheBlk::State old_state = (blk) ? blk->status : 0; + MemPktList writebacks; + blk = tags->handleFill(blk, pkt->senderState, + coherence->getNewState(pkt,old_state), + writebacks); + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + } + } + missQueue->handleResponse(pkt, curTick + hitLatency); + } +} + +template +void +Cache::pseudoFill(Addr addr, int asid) +{ + // Need to temporarily move this blk into MSHRs + MSHR *mshr = missQueue->allocateTargetList(addr, asid); + int lat; + PacketList* dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->isSatisfied()); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(addr, asid); +} + +template +void +Cache::pseudoFill(MSHR *mshr) +{ + // Need to temporarily move this blk into MSHRs + assert(mshr->pkt->cmd == Read); + int lat; + PacketList* dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->isSatisfied()); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(mshr->pkt->paddr, mshr->pkt->req->asid); +} + + +template +Packet * +Cache::getCoherenceReq() +{ + return coherence->getPacket(); +} + + +template +void +Cache::snoop(Packet * &pkt) +{ + Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req + //If we find an mshr, and it is in service, we need to NACK or invalidate + if (mshr) { + if (mshr->inService) { + if ((mshr->pkt->cmd.isInvalidate() || !mshr->pkt->isCacheFill()) + && (pkt->cmd != Invalidate && pkt->cmd != WriteInvalidate)) { + //If the outstanding request was an invalidate (upgrade,readex,..) + //Then we need to ACK the request until we get the data + //Also NACK if the outstanding request is not a cachefill (writeback) + pkt->flags |= NACKED_LINE; + return; + } + else { + //The supplier will be someone else, because we are waiting for + //the data. This should cause this cache to be forced to go to + //the shared state, not the exclusive even though the shared line + //won't be asserted. But for now we will just invlidate ourselves + //and allow the other cache to go into the exclusive state. + //@todo Make it so a read to a pending read doesn't invalidate. + //@todo Make it so that a read to a pending read can't be exclusive now. + + //Set the address so find match works + invalidatePkt->paddr = pkt->paddr; + + //Append the invalidate on + missQueue->addTarget(mshr,invalidatePkt); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + return; + } + } + } + //We also need to check the writeback buffers and handle those + std::vector writebacks; + if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + + //Look through writebacks for any non-uncachable writes, use that + for (int i=0; ipkt->isUncacheable()) { + if (pkt->cmd.isRead()) { + //Only Upgrades don't get here + //Supply the data + pkt->flags |= SATISFIED; + + //If we are in an exclusive protocol, make it ask again + //to get write permissions (upgrade), signal shared + pkt->flags |= SHARED_LINE; + + if (doData()) { + assert(pkt->cmd.isRead()); + + assert(pkt->offset < blkSize); + assert(pkt->size <= blkSize); + assert(pkt->offset + pkt->size <=blkSize); + memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); + } + respondToSnoop(pkt); + } + + if (pkt->cmd.isInvalidate()) { + //This must be an upgrade or other cache will take ownership + missQueue->markInService(mshr->pkt); + } + return; + } + } + } + } + CacheBlk::State new_state; + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + respondToSnoop(pkt); + return; + } + tags->handleSnoop(blk, new_state); +} + +template +void +Cache::snoopResponse(Packet * &pkt) +{ + //Need to handle the response, if NACKED + if (pkt->isNacked()) { + //Need to mark it as not in service, and retry for bus + assert(0); //Yeah, we saw a NACK come through + + //For now this should never get called, we return false when we see a NACK + //instead, by doing this we allow the bus_blocked mechanism to handle the retry + //For now it retrys in just 2 cycles, need to figure out how to change that + //Eventually we will want to also have success come in as a parameter + //Need to make sure that we handle the functionality that happens on successufl + //return of the sendAddr function + } +} + +template +void +Cache::invalidateBlk(Addr addr, int asid) +{ + tags->invalidateBlk(addr,asid); +} + + +/** + * @todo Fix to not assume write allocate + */ +template +Tick +Cache::probe(Packet * &pkt, bool update) +{ + MemDebug::cacheProbe(pkt); + + if (!pkt->isUncacheable()) { + if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() + && !pkt->cmd.isWrite()) { + //Upgrade or Invalidate, satisfy it, don't forward + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), + pkt->paddr & ~((Addr)blkSize - 1)); + pkt->flags |= SATISFIED; + return 0; + } + } + + if (!update && !doData()) { + // Nothing to do here + return mi->sendProbe(pkt,update); + } + + PacketList* writebacks; + int lat; + BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); + + if (!blk) { + // Need to check for outstanding misses and writes + Addr blk_addr = pkt->paddr & ~(blkSize - 1); + + // There can only be one matching outstanding miss. + MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + + // There can be many matching outstanding writes. + vector writes; + missQueue->findWrites(blk_addr, pkt->req->asid, writes); + + if (!update) { + mi->sendProbe(pkt, update); + // Check for data in MSHR and writebuffer. + if (mshr) { + warn("Found outstanding miss on an non-update probe"); + MSHR::TargetList *targets = mshr->getTargetList(); + MSHR::TargetList::iterator i = targets->begin(); + MSHR::TargetList::iterator end = targets->end(); + for (; i != end; ++i) { + Packet * target = *i; + // If the target contains data, and it overlaps the + // probed request, need to update data + if (target->cmd.isWrite() && target->overlaps(pkt)) { + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (target->paddr < pkt->paddr) { + int offset = pkt->paddr - target->paddr; + pkt_data = pkt->data; + write_data = target->data + offset; + data_size = target->size - offset; + assert(data_size > 0); + if (data_size > pkt->size) + data_size = pkt->size; + } else { + int offset = target->paddr - pkt->paddr; + pkt_data = pkt->data + offset; + write_data = target->data; + data_size = pkt->size - offset; + assert(data_size > pkt->size); + if (data_size > target->size) + data_size = target->size; + } + + if (pkt->cmd.isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + } + } + } + for (int i = 0; i < writes.size(); ++i) { + Packet * write = writes[i]->pkt; + if (write->overlaps(pkt)) { + warn("Found outstanding write on an non-update probe"); + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (write->paddr < pkt->paddr) { + int offset = pkt->paddr - write->paddr; + pkt_data = pkt->data; + write_data = write->data + offset; + data_size = write->size - offset; + assert(data_size > 0); + if (data_size > pkt->size) + data_size = pkt->size; + } else { + int offset = write->paddr - pkt->paddr; + pkt_data = pkt->data + offset; + write_data = write->data; + data_size = pkt->size - offset; + assert(data_size > pkt->size); + if (data_size > write->size) + data_size = write->size; + } + + if (pkt->cmd.isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + + } + } + return 0; + } else { + // update the cache state and statistics + if (mshr || !writes.empty()){ + // Can't handle it, return pktuest unsatisfied. + return 0; + } + if (!pkt->isUncacheable()) { + // Fetch the cache block to fill + Packet * busPkt = new MemPkt(); + busPkt->paddr = blk_addr; + busPkt->size = blkSize; + busPkt->data = new uint8_t[blkSize]; + + BlkType *blk = tags->findBlock(pkt); + busPkt->cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + + busPkt->req->asid = pkt->req->asid; + busPkt->xc = pkt->xc; + busPkt->thread_num = pkt->thread_num; + busPkt->time = curTick; + + lat = mi->sendProbe(busPkt, update); + + if (!busPkt->isSatisfied()) { + // blocked at a higher level, just return + return 0; + } + + misses[pkt->cmd.toIndex()][pkt->thread_num]++; + + CacheBlk::State old_state = (blk) ? blk->status : 0; + tags->handleFill(blk, busPkt, + coherence->getNewState(busPkt, old_state), + writebacks, pkt); + // Handle writebacks if needed + while (!writebacks.empty()){ + mi->sendProbe(writebacks.front(), update); + writebacks.pop_front(); + } + return lat + hitLatency; + } else { + return mi->sendProbe(pkt,update); + } + } + } else { + // There was a cache hit. + // Handle writebacks if needed + while (!writebacks.empty()){ + mi->sendProbe(writebacks.front(), update); + writebacks.pop_front(); + } + + if (update) { + hits[pkt->cmd.toIndex()][pkt->thread_num]++; + } else if (pkt->cmd.isWrite()) { + // Still need to change data in all locations. + return mi->sendProbe(pkt, update); + } + return curTick + lat; + } + fatal("Probe not handled.\n"); + return 0; +} + +template +Tick +Cache::snoopProbe(MemPktPtr &pkt, bool update) +{ + Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + CacheBlk::State new_state = 0; + bool satisfy = coherence->handleBusPktuest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + return hitLatency; + } + tags->handleSnoop(blk, new_state); + return 0; +} + diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc new file mode 100644 index 0000000000..107fd2502b --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Ron Dreslinski + */ + +/** + * @file + * Definitions of CoherenceProtocol. + */ + +#include + +#include "base/misc.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/cache.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +using namespace std; + + +CoherenceProtocol::StateTransition::StateTransition() + : busCmd(InvalidCmd), newState(-1), snoopFunc(invalidTransition) +{ +} + + +void +CoherenceProtocol::regStats() +{ + // Even though we count all the possible transitions in the + // requestCount and snoopCount arrays, most of these are invalid, + // so we just select the interesting ones to print here. + + requestCount[Invalid][Read] + .name(name() + ".read_invalid") + .desc("read misses to invalid blocks") + ; + + requestCount[Invalid][Write] + .name(name() +".write_invalid") + .desc("write misses to invalid blocks") + ; + + requestCount[Invalid][Soft_Prefetch] + .name(name() +".swpf_invalid") + .desc("soft prefetch misses to invalid blocks") + ; + + requestCount[Invalid][Hard_Prefetch] + .name(name() +".hwpf_invalid") + .desc("hard prefetch misses to invalid blocks") + ; + + requestCount[Shared][Write] + .name(name() + ".write_shared") + .desc("write misses to shared blocks") + ; + + requestCount[Owned][Write] + .name(name() + ".write_owned") + .desc("write misses to owned blocks") + ; + + snoopCount[Shared][Read] + .name(name() + ".snoop_read_shared") + .desc("read snoops on shared blocks") + ; + + snoopCount[Shared][ReadEx] + .name(name() + ".snoop_readex_shared") + .desc("readEx snoops on shared blocks") + ; + + snoopCount[Shared][Upgrade] + .name(name() + ".snoop_upgrade_shared") + .desc("upgradee snoops on shared blocks") + ; + + snoopCount[Modified][Read] + .name(name() + ".snoop_read_modified") + .desc("read snoops on modified blocks") + ; + + snoopCount[Modified][ReadEx] + .name(name() + ".snoop_readex_modified") + .desc("readEx snoops on modified blocks") + ; + + snoopCount[Owned][Read] + .name(name() + ".snoop_read_owned") + .desc("read snoops on owned blocks") + ; + + snoopCount[Owned][ReadEx] + .name(name() + ".snoop_readex_owned") + .desc("readEx snoops on owned blocks") + ; + + snoopCount[Owned][Upgrade] + .name(name() + ".snoop_upgrade_owned") + .desc("upgrade snoops on owned blocks") + ; + + snoopCount[Exclusive][Read] + .name(name() + ".snoop_read_exclusive") + .desc("read snoops on exclusive blocks") + ; + + snoopCount[Exclusive][ReadEx] + .name(name() + ".snoop_readex_exclusive") + .desc("readEx snoops on exclusive blocks") + ; + + snoopCount[Shared][Invalidate] + .name(name() + ".snoop_inv_shared") + .desc("Invalidate snoops on shared blocks") + ; + + snoopCount[Owned][Invalidate] + .name(name() + ".snoop_inv_owned") + .desc("Invalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][Invalidate] + .name(name() + ".snoop_inv_exclusive") + .desc("Invalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][Invalidate] + .name(name() + ".snoop_inv_modified") + .desc("Invalidate snoops on modified blocks") + ; + + snoopCount[Invalid][Invalidate] + .name(name() + ".snoop_inv_invalid") + .desc("Invalidate snoops on invalid blocks") + ; + + snoopCount[Shared][WriteInvalidate] + .name(name() + ".snoop_writeinv_shared") + .desc("WriteInvalidate snoops on shared blocks") + ; + + snoopCount[Owned][WriteInvalidate] + .name(name() + ".snoop_writeinv_owned") + .desc("WriteInvalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][WriteInvalidate] + .name(name() + ".snoop_writeinv_exclusive") + .desc("WriteInvalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][WriteInvalidate] + .name(name() + ".snoop_writeinv_modified") + .desc("WriteInvalidate snoops on modified blocks") + ; + + snoopCount[Invalid][WriteInvalidate] + .name(name() + ".snoop_writeinv_invalid") + .desc("WriteInvalidate snoops on invalid blocks") + ; +} + + +bool +CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // invalidate the block + new_state = (blk->status & ~stateMask) | Invalid; + return false; +} + + +bool +CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state + ) +{ + return true; +} + + +bool +CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Owned; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Invalid; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + +bool +CoherenceProtocol::assertShared(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return false; +} + +CoherenceProtocol::CoherenceProtocol(const string &name, + const string &protocol, + const bool doUpgrades) + : SimObject(name) +{ + if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) { + cerr << "CoherenceProtocol: ownership protocols require upgrade transactions" + << "(write miss on owned block generates ReadExcl, which will clobber dirty block)" + << endl; + fatal(""); + } + + Packet::CommandEnum writeToSharedCmd = doUpgrades ? Upgrade : ReadEx; + +//@todo add in hardware prefetch to this list + if (protocol == "msi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Shared); + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(nullTransition); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Exclusive][Read].onSnoop(assertShared); + transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mosi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Owned][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Shared); + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + } + + else if(protocol == "moesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Owned][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Exclusive][Read].onSnoop(assertShared); + transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + } + + else { + cerr << "CoherenceProtocol: unrecognized protocol " << protocol + << endl; + fatal(""); + } +} + + +Packet::Command +CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, + MSHR *mshr) +{ + state &= stateMask; + int cmd_idx = cmdIn.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd; + + assert(cmdOut != InvalidCmd); + + ++requestCount[state][cmd_idx]; + + return cmdOut; +} + + +CacheBlk::State +CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) +{ + CacheBlk::State state = oldState & stateMask; + int cmd_idx = pkt->cmd.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + CacheBlk::State newState = transitionTable[state][cmd_idx].newState; + + //Check if it's exclusive and the shared line was asserted, + //then goto shared instead + if (newState == Exclusive && (pkt->flags & SHARED_LINE)) { + newState = Shared; + } + + assert(newState != -1); + + //Make sure not to loose any other state information + newState = (oldState & ~stateMask) | newState; + return newState; +} + + +bool +CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + if (blk == NULL) { + // nothing to do if we don't have a block + return false; + } + + CacheBlk::State state = blk->status & stateMask; + int cmd_idx = pkt->cmd.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + +// assert(mshr == NULL); // can't currently handle outstanding requests + //Check first if MSHR, and also insure, if there is one, that it is not in service + assert(!mshr || mshr->inService == 0); + ++snoopCount[state][cmd_idx]; + + bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr, + new_state); + + + + return ret; +} + +bool +CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // do nothing + if (blk) + new_state = blk->status; + return false; +} + + +bool +CoherenceProtocol::invalidTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + panic("Invalid transition"); + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + Param protocol; + Param do_upgrades; + +END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + INIT_PARAM(protocol, "name of coherence protocol"), + INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true) + +END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +CREATE_SIM_OBJECT(CoherenceProtocol) +{ + return new CoherenceProtocol(getInstanceName(), protocol, + do_upgrades); +} + +REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh new file mode 100644 index 0000000000..4f65205525 --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + */ + +/** + * @file + * Declaration of CoherenceProcotol a basic coherence policy. + */ +#ifndef __COHERENCE_PROTOCOL_HH__ +#define __COHERENCE_PROTOCOL_HH__ + +#include + +#include "sim/sim_object.hh" +#include "mem/packet.hh" +#include "mem/mem_cmd.hh" +#include "mem/cache/cache_blk.hh" +#include "base/statistics.hh" + +class BaseCache; +class MSHR; + +/** + * A simple coherence policy for the memory hierarchy. Currently implements + * MSI, MESI, and MOESI protocols. + */ +class CoherenceProtocol : public SimObject +{ + public: + /** + * Contruct and initialize this policy. + * @param name The name of this policy. + * @param protocol The string representation of the protocol to use. + * @param doUpgrades True if bus upgrades should be used. + */ + CoherenceProtocol(const std::string &name, const std::string &protocol, + const bool doUpgrades); + + /** + * Destructor. + */ + virtual ~CoherenceProtocol() {}; + + /** + * Register statistics + */ + virtual void regStats(); + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param status The current state of the cache block. + * @param mshr The MSHR matching the request. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command cmd, CacheBlk::State status, + MSHR *mshr = NULL); + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param oldState The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(const Packet * &pkt, + CacheBlk::State oldState); + + /** + * Handle snooped bus requests. + * @param cache The cache that snooped the request. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(BaseCache *cache, Packet * &pkt, CacheBlk *blk, + MSHR *mshr, CacheBlk::State &new_state); + + protected: + /** Snoop function type. */ + typedef bool (*SnoopFuncType)(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + // + // Standard snoop transition functions + // + + /** + * Do nothing transition. + */ + static bool nullTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalid transition, basically panic. + */ + static bool invalidTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, move to Invalid state. + */ + static bool invalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data, no state transition. + */ + static bool supplyTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Shared state. + */ + static bool supplyAndGotoSharedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Owned state. + */ + static bool supplyAndGotoOwnedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, supply data, and go to Invalid state. + */ + static bool supplyAndInvalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Assert the shared line for a block that is shared/exclusive. + */ + static bool assertShared(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Definition of protocol state transitions. + */ + class StateTransition + { + friend class CoherenceProtocol; + + /** The bus command of this transition. */ + Packet::Command busCmd; + /** The state to transition to. */ + int newState; + /** The snoop function for this transition. */ + SnoopFuncType snoopFunc; + + /** + * Constructor, defaults to invalid transition. + */ + StateTransition(); + + /** + * Initialize bus command. + * @param cmd The bus command to use. + */ + void onRequest(Packet::Command cmd) + { + busCmd = cmd; + } + + /** + * Set the transition state. + * @param s The new state. + */ + void onResponse(CacheBlk::State s) + { + newState = s; + } + + /** + * Initialize the snoop function. + * @param f The new snoop function. + */ + void onSnoop(SnoopFuncType f) + { + snoopFunc = f; + } + }; + + friend class CoherenceProtocol::StateTransition; + + /** Mask to select status bits relevant to coherence protocol. */ + const static CacheBlk::State + stateMask = BlkValid | BlkWritable | BlkDirty; + + /** The Modified (M) state. */ + const static CacheBlk::State + Modified = BlkValid | BlkWritable | BlkDirty; + /** The Owned (O) state. */ + const static CacheBlk::State + Owned = BlkValid | BlkDirty; + /** The Exclusive (E) state. */ + const static CacheBlk::State + Exclusive = BlkValid | BlkWritable; + /** The Shared (S) state. */ + const static CacheBlk::State + Shared = BlkValid; + /** The Invalid (I) state. */ + const static CacheBlk::State + Invalid = 0; + + /** + * Maximum state encoding value (used to size transition lookup + * table). Could be more than number of states, depends on + * encoding of status bits. + */ + const static int stateMax = stateMask; + + /** + * The table of all possible transitions, organized by starting state and + * request command. + */ + StateTransition transitionTable[stateMax+1][NUM_MEM_CMDS]; + + /** + * @addtogroup CoherenceStatistics + * @{ + */ + /** + * State accesses from parent cache. + */ + Stats::Scalar<> requestCount[stateMax+1][NUM_MEM_CMDS]; + /** + * State accesses from snooped requests. + */ + Stats::Scalar<> snoopCount[stateMax+1][NUM_MEM_CMDS]; + /** + * @} + */ +}; + +#endif // __COHERENCE_PROTOCOL_HH__ diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh new file mode 100644 index 0000000000..1956745902 --- /dev/null +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a simple coherence policy. + */ + +#ifndef __SIMPLE_COHERENCE_HH__ +#define __SIMPLE_COHERENCE_HH__ + +#include + +#include "mem/packet.hh" +#include "mem/mem_cmd.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/cache/coherence/coherence_protocol.hh" + +class BaseCache; + +/** + * A simple MP coherence policy. This policy assumes an atomic bus and only one + * level of cache. + */ +class SimpleCoherence +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + /** Pointer to the coherence protocol. */ + CoherenceProtocol *protocol; + + public: + /** + * Construct and initialize this coherence policy. + * @param _protocol The coherence protocol to use. + */ + SimpleCoherence(CoherenceProtocol *_protocol) + : protocol(_protocol) + { + } + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * This policy does not forward invalidates, return NULL. + * @return NULL. + */ + Packet * getPacket() + { + return NULL; + } + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + return protocol->getNewState(pkt, current); + } + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state Return the new state for the block. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) + { +// assert(mshr == NULL); +//Got rid of, there could be an MSHR, but it can't be in service + if (blk != NULL) + { + if (pkt->cmd != Writeback) { + return protocol->handleBusRequest(cache, pkt, blk, mshr, + new_state); + } + else { //It is a writeback, must be ownership protocol, just keep state + new_state = blk->status; + } + } + return false; + } + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Writeback) return Writeback; + return protocol->getBusCmd(cmd, state); + } + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return false; } + + bool hasProtocol() { return true; } +}; + +#endif //__SIMPLE_COHERENCE_HH__ + + + + + + + + diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc new file mode 100644 index 0000000000..68a78e3951 --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/base_cache.hh" + +#include "base/trace.hh" + +using namespace std; + +UniCoherence::UniCoherence() + : cshrs(50) +{ +} + +Packet * +UniCoherence::getPacket() +{ + bool unblock = cshrs.isFull(); + Packet * pkt = cshrs.getPkt(); + cshrs.markInService(pkt->senderState); + if (!cshrs.havePending()) { + cache->clearSlaveRequest(Request_Coherence); + } + if (unblock) { + //since CSHRs are always used as buffers, should always get rid of one + assert(!cshrs.isFull()); + cache->clearBlocked(Blocked_Coherence); + } + return pkt; +} + +/** + * @todo add support for returning slave requests, not doing them here. + */ +bool +UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) +{ + new_state = 0; + if (pkt->cmd.isInvalidate()) { + DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", + pkt->paddr, blk); + if (!cache->isTopLevel()) { + // Forward to other caches + Packet * tmp = new MemPkt(); + tmp->cmd = Invalidate; + tmp->paddr = pkt->paddr; + tmp->size = pkt->size; + cshrs.allocate(tmp); + cache->setSlaveRequest(Request_Coherence, curTick); + if (cshrs.isFull()) { + cache->setBlockedForSnoop(Blocked_Coherence); + } + } + } else { + if (blk) { + new_state = blk->status; + } + } + return false; +} diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh new file mode 100644 index 0000000000..b64f6c931a --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#ifndef __UNI_COHERENCE_HH__ +#define __UNI_COHERENCE_HH__ + +#include "base/trace.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/mem_cmd.hh" +#include "mem/packet.hh" + +class BaseCache; + +class UniCoherence +{ + protected: + /** Buffers to hold forwarded invalidates. */ + MSHRQueue cshrs; + /** Pointer to the parent cache. */ + BaseCache *cache; + + public: + /** + * Construct and initialize this coherence policy. + */ + UniCoherence(); + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * Return Read. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + * @todo Make changes so writebacks don't get here. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Hard_Prefetch && state) + warn("Trying to issue a prefetch to a block we already have\n"); + if (cmd == Writeback) + return Writeback; + return Read; + } + + /** + * Just return readable and writeable. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + if (pkt->senderState) //Blocking Buffers don't get mshrs + { + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); + return BlkHWPrefetched | BlkValid | BlkWritable; + } + else { + return BlkValid | BlkWritable; + } + } + //@todo What about prefetching with blocking buffers + else + return BlkValid | BlkWritable; + } + /** + * Return outstanding invalidate to forward. + * @return The next invalidate to forward to lower levels of cache. + */ + Packet * getPacket(); + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state); + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return true; } + + bool hasProtocol() { return false; } +}; + +#endif //__UNI_COHERENCE_HH__ diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc new file mode 100644 index 0000000000..621855c3d1 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of a simple buffer for a blocking cache. + */ + +#include "cpu/exec_context.hh" +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/blocking_buffer.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include "sim/eventq.hh" // for Event declaration. + +using namespace TheISA; + +/** + * @todo Move writebacks into shared BaseBuffer class. + */ +void +BlockingBuffer::regStats(const std::string &name) +{ + using namespace Stats; + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; +} + +void +BlockingBuffer::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} +void +BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) +{ + Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); + if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + pkt->cmd.isNoResponse())) { + if (pkt->cmd.isNoResponse()) { + wb.allocateAsBuffer(pkt); + } else { + wb.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + } + if (cache->doData()) { + memcpy(wb.pkt->data, pkt->data, blk_size); + } + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, time); + return; + } + + if (pkt->cmd.isNoResponse()) { + miss.allocateAsBuffer(pkt); + } else { + miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + } + if (!pkt->isUncacheable()) { + miss.pkt->flags |= CACHE_LINE_FILL; + } + cache->setBlocked(Blocked_NoMSHRs); + cache->setMasterRequest(Request_MSHR, time); +} + +Packet * +BlockingBuffer::getPacket() +{ + if (miss.pkt && !miss.inService) { + return miss.pkt; + } + return wb.pkt; +} + +void +BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + MSHR *mshr = pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill()) + pkt->cmd = cmd; +} + +void +BlockingBuffer::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmd = pkt->senderState->originalCmd; +} + +void +BlockingBuffer::markInService(Packet * &pkt) +{ + if (!pkt->isCacheFill() && pkt->cmd.isWrite()) { + // Forwarding a write/ writeback, don't need to change + // the command + assert(pkt->senderState == &wb); + cache->clearMasterRequest(Request_WB); + if (pkt->cmd.isNoResponse()) { + assert(wb.getNumTargets() == 0); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + wb.inService = true; + } + } else { + assert(pkt->senderState == &miss); + cache->clearMasterRequest(Request_MSHR); + if (pkt->cmd.isNoResponse()) { + assert(miss.getNumTargets() == 0); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + //mark in service + miss.inService = true; + } + } +} + +void +BlockingBuffer::handleResponse(Packet * &pkt, Tick time) +{ + if (pkt->isCacheFill()) { + // targets were handled in the cache tags + assert(pkt->senderState == &miss); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + if (pkt->senderState->hasTargets()) { + // Should only have 1 target if we had any + assert(pkt->senderState->getNumTargets() == 1); + Packet * target = pkt->senderState->getTarget(); + pkt->senderState->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + assert(!pkt->senderState->hasTargets()); + } + + if (pkt->cmd.isWrite()) { + assert(pkt->senderState == &wb); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } + } +} + +void +BlockingBuffer::squash(int thread_number) +{ + if (miss.threadNum == thread_number) { + Packet * target = miss.getTarget(); + miss.popTarget(); + assert(target->thread_num == thread_number); + if (target->completionEvent != NULL) { + delete target->completionEvent; + } + target = NULL; + assert(!miss.hasTargets()); + miss.ntargets=0; + if (!miss.inService) { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + cache->clearMasterRequest(Request_MSHR); + } + } +} + +void +BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, + int size, uint8_t *data, bool compressed) +{ + + // Generate request + Packet * pkt = new Packet(); + pkt->paddr = addr; + pkt->req->asid = asid; + pkt->size = size; + pkt->data = new uint8_t[size]; + if (data) { + memcpy(pkt->data, data, size); + } + /** + * @todo Need to find a way to charge the writeback to the "correct" + * thread. + */ + pkt->xc = xc; + if (xc) + pkt->thread_num = xc->getThreadNum(); + else + pkt->thread_num = 0; + + pkt->cmd = Writeback; + if (compressed) { + pkt->flags |= COMPRESSED; + } + + writebacks[pkt->thread_num]++; + + wb.allocateAsBuffer(pkt); + cache->setMasterRequest(Request_WB, curTick); + cache->setBlocked(Blocked_NoWBBuffers); +} + + + +void +BlockingBuffer::doWriteback(Packet * &pkt) +{ + writebacks[pkt->thread_num]++; + + wb.allocateAsBuffer(pkt); + + // Since allocate as buffer copies the request, + // need to copy data here. + if (cache->doData()) { + memcpy(wb.pkt->data, pkt->data, pkt->size); + } + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, curTick); +} diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh new file mode 100644 index 0000000000..52256be742 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a simple buffer for a blocking cache. + */ + +#ifndef __BLOCKING_BUFFER_HH__ +#define __BLOCKING_BUFFER_HH__ + +#include + +#include "mem/cache/miss/mshr.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; + +/** + * Miss and writeback storage for a blocking cache. + */ +class BlockingBuffer +{ +protected: + /** Miss storage. */ + MSHR miss; + /** WB storage. */ + MSHR wb; + + //Params + + /** Allocate on write misses. */ + const bool writeAllocate; + + /** Pointer to the parent cache. */ + BaseCache* cache; + + BasePrefetcher* prefetcher; + + /** Block size of the parent cache. */ + int blkSize; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** + * @} + */ + +public: + /** + * Builds and initializes this buffer. + * @param write_allocate If true, treat write misses the same as reads. + */ + BlockingBuffer(bool write_allocate) + : writeAllocate(write_allocate) + { + } + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Requests the bus and marks the cache as + * blocked. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) + { + fatal("Unimplemented"); + } + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Frees the resources of the request and unblock the cache. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param thread_number The thread number of the requests to squash. + */ + void squash(int thread_number); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return miss.getNumTargets(); + } + + /** + * Searches for the supplied address in the miss "queue". + * @param addr The address to look for. + * @param asid The address space id. + * @return A pointer to miss if it matches. + */ + MSHR* findMSHR(Addr addr, int asid) + { + if (miss.addr == addr && miss.pkt) + return &miss; + return NULL; + } + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes List of pointers to the matching writes. + * @return True if there is a matching write. + */ + bool findWrites(Addr addr, int asid, std::vector& writes) + { + if (wb.addr == addr && wb.pkt) { + writes.push_back(&wb); + return true; + } + return false; + } + + + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param xc The execution context of the address space. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, ExecContext *xc, + int size, uint8_t *data, bool compressed); + + /** + * Perform a writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending() + { + return !miss.inService || !wb.inService; + } + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + fatal("Shouldn't call this on a blocking buffer."); + } + + /** + * Dummy implmentation. + */ + MSHR* allocateTargetList(Addr addr, int asid) + { + fatal("Unimplemented"); + } +}; + +#endif // __BLOCKING_BUFFER_HH__ diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc new file mode 100644 index 0000000000..7902fbcee9 --- /dev/null +++ b/src/mem/cache/miss/miss_queue.cc @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue definitions. + */ + +#include "cpu/exec_context.hh" +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" + +using namespace std; + +// simple constructor +/** + * @todo Remove the +16 from the write buffer constructor once we handle + * stalling on writebacks do to compression writes. + */ +MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss) + : mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs), + numTarget(numTargets), writeBuffers(write_buffers), + writeAllocate(write_allocate), order(0), prefetchMiss(prefetch_miss) +{ + noTargetMSHR = NULL; +} + +void +MissQueue::regStats(const string &name) +{ + using namespace Stats; + + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; + + // MSHR hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = cmd.toString(); + + mshr_hits[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_hits") + .desc("number of " + cstr + " MSHR hits") + .flags(total | nozero | nonan) + ; + } + + demandMshrHits + .name(name + ".demand_mshr_hits") + .desc("number of demand (read+write) MSHR hits") + .flags(total) + ; + demandMshrHits = mshr_hits[Read] + mshr_hits[Write]; + + overallMshrHits + .name(name + ".overall_mshr_hits") + .desc("number of overall MSHR hits") + .flags(total) + ; + overallMshrHits = demandMshrHits + mshr_hits[Soft_Prefetch] + + mshr_hits[Hard_Prefetch]; + + // MSHR miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_misses[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_misses") + .desc("number of " + cstr + " MSHR misses") + .flags(total | nozero | nonan) + ; + } + + demandMshrMisses + .name(name + ".demand_mshr_misses") + .desc("number of demand (read+write) MSHR misses") + .flags(total) + ; + demandMshrMisses = mshr_misses[Read] + mshr_misses[Write]; + + overallMshrMisses + .name(name + ".overall_mshr_misses") + .desc("number of overall MSHR misses") + .flags(total) + ; + overallMshrMisses = demandMshrMisses + mshr_misses[Soft_Prefetch] + + mshr_misses[Hard_Prefetch]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_miss_latency[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_miss_latency") + .desc("number of " + cstr + " MSHR miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMshrMissLatency + .name(name + ".demand_mshr_miss_latency") + .desc("number of demand (read+write) MSHR miss cycles") + .flags(total) + ; + demandMshrMissLatency = mshr_miss_latency[Read] + mshr_miss_latency[Write]; + + overallMshrMissLatency + .name(name + ".overall_mshr_miss_latency") + .desc("number of overall MSHR miss cycles") + .flags(total) + ; + overallMshrMissLatency = demandMshrMissLatency + + mshr_miss_latency[Soft_Prefetch] + mshr_miss_latency[Hard_Prefetch]; + + // MSHR uncacheable statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_uncacheable[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable") + .desc("number of " + cstr + " MSHR uncacheable") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheable + .name(name + ".overall_mshr_uncacheable_misses") + .desc("number of overall MSHR uncacheable misses") + .flags(total) + ; + overallMshrUncacheable = mshr_uncacheable[Read] + mshr_uncacheable[Write] + + mshr_uncacheable[Soft_Prefetch] + mshr_uncacheable[Hard_Prefetch]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_uncacheable_lat[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable_latency") + .desc("number of " + cstr + " MSHR uncacheable cycles") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheableLatency + .name(name + ".overall_mshr_uncacheable_latency") + .desc("number of overall MSHR uncacheable cycles") + .flags(total) + ; + overallMshrUncacheableLatency = mshr_uncacheable_lat[Read] + + mshr_uncacheable_lat[Write] + mshr_uncacheable_lat[Soft_Prefetch] + + mshr_uncacheable_lat[Hard_Prefetch]; + +#if 0 + // MSHR access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshrAccesses[access_idx] + .name(name + "." + cstr + "_mshr_accesses") + .desc("number of " + cstr + " mshr accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + mshrAccesses[access_idx] = + mshr_hits[access_idx] + mshr_misses[access_idx] + + mshr_uncacheable[access_idx]; + } + + demandMshrAccesses + .name(name + ".demand_mshr_accesses") + .desc("number of demand (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + demandMshrAccesses = demandMshrHits + demandMshrMisses; + + overallMshrAccesses + .name(name + ".overall_mshr_accesses") + .desc("number of overall (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + overallMshrAccesses = overallMshrHits + overallMshrMisses + + overallMshrUncacheable; +#endif + + // MSHR miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshrMissRate[access_idx] + .name(name + "." + cstr + "_mshr_miss_rate") + .desc("mshr miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + mshrMissRate[access_idx] = + mshr_misses[access_idx] / cache->accesses[access_idx]; + } + + demandMshrMissRate + .name(name + ".demand_mshr_miss_rate") + .desc("mshr miss rate for demand accesses") + .flags(total) + ; + demandMshrMissRate = demandMshrMisses / cache->demandAccesses; + + overallMshrMissRate + .name(name + ".overall_mshr_miss_rate") + .desc("mshr miss rate for overall accesses") + .flags(total) + ; + overallMshrMissRate = overallMshrMisses / cache->overallAccesses; + + // mshrMiss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMshrMissLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_miss_latency") + .desc("average " + cstr + " mshr miss latency") + .flags(total | nozero | nonan) + ; + + avgMshrMissLatency[access_idx] = + mshr_miss_latency[access_idx] / mshr_misses[access_idx]; + } + + demandAvgMshrMissLatency + .name(name + ".demand_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; + + overallAvgMshrMissLatency + .name(name + ".overall_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; + + // mshrUncacheable latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMshrUncacheableLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") + .desc("average " + cstr + " mshr uncacheable latency") + .flags(total | nozero | nonan) + ; + + avgMshrUncacheableLatency[access_idx] = + mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + } + + overallAvgMshrUncacheableLatency + .name(name + ".overall_avg_mshr_uncacheable_latency") + .desc("average overall mshr uncacheable latency") + .flags(total) + ; + overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; + + mshr_cap_events + .init(maxThreadsPerCPU) + .name(name + ".mshr_cap_events") + .desc("number of times MSHR cap was activated") + .flags(total) + ; + + //software prefetching stats + soft_prefetch_mshr_full + .init(maxThreadsPerCPU) + .name(name + ".soft_prefetch_mshr_full") + .desc("number of mshr full events for SW prefetching instrutions") + .flags(total) + ; + + mshr_no_allocate_misses + .name(name +".no_allocate_misses") + .desc("Number of misses that were no-allocate") + ; + +} + +void +MissQueue::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} + +MSHR* +MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = mq.allocate(pkt, size); + mshr->order = order++; + if (!pkt->isUncacheable() ){//&& !pkt->isNoAllocate()) { + // Mark this as a cache line fill + mshr->pkt->flags |= CACHE_LINE_FILL; + } + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + if (pkt->cmd != Hard_Prefetch) { + //If we need to request the bus (not on HW prefetch), do so + cache->setMasterRequest(Request_MSHR, time); + } + return mshr; +} + + +MSHR* +MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = wb.allocate(pkt,pkt->size); + mshr->order = order++; + if (cache->doData()){ + if (pkt->isCompressed()) { + delete [] mshr->pkt->data; + mshr->pkt->actualSize = pkt->actualSize; + mshr->pkt->data = new uint8_t[pkt->actualSize]; + memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); + } else { + memcpy(mshr->pkt->data, pkt->data, pkt->size); + } + } + if (wb.isFull()) { + cache->setBlocked(Blocked_NoWBBuffers); + } + + cache->setMasterRequest(Request_WB, time); + + return mshr; +} + + +/** + * @todo Remove SW prefetches on mshr hits. + */ +void +MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) +{ +// if (!cache->isTopLevel()) + if (prefetchMiss) prefetcher->handleMiss(pkt, time); + + int size = blkSize; + Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + MSHR* mshr = NULL; + if (!pkt->isUncacheable()) { + mshr = mq.findMatch(blkAddr, pkt->req->asid); + if (mshr) { + //@todo remove hw_pf here + mshr_hits[pkt->cmd.toIndex()][pkt->thread_num]++; + if (mshr->threadNum != pkt->thread_num) { + mshr->threadNum = -1; + } + mq.allocateTarget(mshr, pkt); + if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { + //We are adding an allocate after a no-allocate + mshr->pkt->flags &= ~NO_ALLOCATE; + } + if (mshr->getNumTargets() == numTarget) { + noTargetMSHR = mshr; + cache->setBlocked(Blocked_NoTargets); + mq.moveToFront(mshr); + } + return; + } + if (pkt->isNoAllocate()) { + //Count no-allocate requests differently + mshr_no_allocate_misses++; + } + else { + mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + } + } else { + //Count uncacheable accesses + mshr_uncacheable[pkt->cmd.toIndex()][pkt->thread_num]++; + size = pkt->size; + } + if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + pkt->cmd.isNoResponse())) { + /** + * @todo Add write merging here. + */ + mshr = allocateWrite(pkt, pkt->size, time); + return; + } + + mshr = allocateMiss(pkt, size, time); +} + +MSHR* +MissQueue::fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) +{ + Addr blkAddr = addr & ~(Addr)(blk_size - 1); + assert(mq.findMatch(addr, asid) == NULL); + MSHR *mshr = mq.allocateFetch(blkAddr, asid, blk_size, target); + mshr->order = order++; + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + cache->setMasterRequest(Request_MSHR, time); + return mshr; +} + +Packet * +MissQueue::getPacket() +{ + Packet * pkt = mq.getReq(); + if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt || + pkt->time > curTick) && wb.havePending()) { + pkt = wb.getReq(); + // Need to search for earlier miss. + MSHR *mshr = mq.findPending(pkt); + if (mshr && mshr->order < pkt->senderState->order) { + // Service misses in order until conflict is cleared. + return mq.getReq(); + } + } + if (pkt) { + MSHR* mshr = wb.findPending(pkt); + if (mshr /*&& mshr->order < pkt->senderState->order*/) { + // The only way this happens is if we are + // doing a write and we didn't have permissions + // then subsequently saw a writeback(owned got evicted) + // We need to make sure to perform the writeback first + // To preserve the dirty data, then we can issue the write + return wb.getReq(); + } + } + else if (!mq.isFull()){ + //If we have a miss queue slot, we can try a prefetch + pkt = prefetcher->getPacket(); + if (pkt) { + //Update statistic on number of prefetches issued (hwpf_mshr_misses) + mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + //It will request the bus for the future, but should clear that immedieatley + allocateMiss(pkt, pkt->size, curTick); + pkt = mq.getReq(); + assert(pkt); //We should get back a req b/c we just put one in + } + } + return pkt; +} + +void +MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + assert(pkt->senderState != 0); + MSHR * mshr = pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill() || pkt->isNoAllocate()) + pkt->cmd = cmd; +} + +void +MissQueue::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmd = pkt->senderState->originalCmd; +} + +void +MissQueue::markInService(Packet * &pkt) +{ + assert(pkt->senderState != 0); + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + /** + * @todo Should include MSHRQueue pointer in MSHR to select the correct + * one. + */ + if ((!pkt->isCacheFill() && pkt->cmd.isWrite()) || pkt->cmd == Copy) { + // Forwarding a write/ writeback, don't need to change + // the command + unblock = wb.isFull(); + wb.markInService(pkt->senderState); + if (!wb.havePending()){ + cache->clearMasterRequest(Request_WB); + } + if (unblock) { + // Do we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.markInService(pkt->senderState); + if (!mq.havePending()){ + cache->clearMasterRequest(Request_MSHR); + } + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", + cache->name()); + //Also clear pending if need be + if (!prefetcher->havePending()) + { + cache->clearMasterRequest(Request_PF); + } + } + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + if (unblock) { + cache->clearBlocked(cause); + } +} + + +void +MissQueue::handleResponse(Packet * &pkt, Tick time) +{ + MSHR* mshr = pkt->senderState; + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", + cache->name()); + } +#ifndef NDEBUG + int num_targets = mshr->getNumTargets(); +#endif + + bool unblock = false; + bool unblock_target = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + mshr_miss_latency[mshr->originalCmd][pkt->thread_num] += + curTick - pkt->time; + // targets were handled in the cache tags + if (mshr == noTargetMSHR) { + // we always clear at least one target + unblock_target = true; + cause = Blocked_NoTargets; + noTargetMSHR = NULL; + } + + if (mshr->hasTargets()) { + // Didn't satisfy all the targets, need to resend + Packet::Command cmd = mshr->getTarget()->cmd; + mq.markPending(mshr, cmd); + mshr->order = order++; + cache->setMasterRequest(Request_MSHR, time); + } + else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } else { + if (pkt->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd][pkt->thread_num] += + curTick - pkt->time; + } + if (mshr->hasTargets() && pkt->isUncacheable()) { + // Should only have 1 target if we had any + assert(num_targets == 1); + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + assert(!mshr->hasTargets()); + } + else if (mshr->hasTargets()) { + //Must be a no_allocate with possibly more than one target + assert(mshr->pkt->isNoAllocate()); + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + } + } + + if (pkt->cmd.isWrite()) { + // If the wrtie buffer is full, we might unblock now + unblock = wb.isFull(); + wb.deallocate(mshr); + if (unblock) { + // Did we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } + if (unblock || unblock_target) { + cache->clearBlocked(cause); + } +} + +void +MissQueue::squash(int thread_number) +{ + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (noTargetMSHR && noTargetMSHR->threadNum == thread_number) { + noTargetMSHR = NULL; + unblock = true; + cause = Blocked_NoTargets; + } + if (mq.isFull()) { + unblock = true; + cause = Blocked_NoMSHRs; + } + mq.squash(thread_number); + if (!mq.havePending()) { + cache->clearMasterRequest(Request_MSHR); + } + if (unblock && !mq.isFull()) { + cache->clearBlocked(cause); + } + +} + +MSHR* +MissQueue::findMSHR(Addr addr, int asid) const +{ + return mq.findMatch(addr,asid); +} + +bool +MissQueue::findWrites(Addr addr, int asid, vector &writes) const +{ + return wb.findMatches(addr,asid,writes); +} + +void +MissQueue::doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed) +{ + // Generate request + Packet * pkt = buildWritebackReq(addr, asid, size, data, + compressed); + + writebacks[pkt->thread_num]++; + + allocateWrite(pkt, 0, curTick); +} + + +void +MissQueue::doWriteback(Packet * &pkt) +{ + writebacks[pkt->thread_num]++; + allocateWrite(pkt, 0, curTick); +} + + +MSHR* +MissQueue::allocateTargetList(Addr addr, int asid) +{ + MSHR* mshr = mq.allocateTargetList(addr, asid, blkSize); + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + return mshr; +} + +bool +MissQueue::havePending() +{ + return mq.havePending() || wb.havePending() || prefetcher->havePending(); +} diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh new file mode 100644 index 0000000000..ce827fe812 --- /dev/null +++ b/src/mem/cache/miss/miss_queue.hh @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MISS_QUEUE_HH__ +#define __MISS_QUEUE_HH__ + +#include + +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; +/** + * Manages cache misses and writebacks. Contains MSHRs to store miss data + * and the writebuffer for writes/writebacks. + * @todo need to handle data on writes better (encapsulate). + * @todo need to make replacements/writebacks happen in Cache::access + */ +class MissQueue +{ + protected: + /** The MSHRs. */ + MSHRQueue mq; + /** Write Buffer. */ + MSHRQueue wb; + + // PARAMTERS + + /** The number of MSHRs in the miss queue. */ + const int numMSHR; + /** The number of targets for each MSHR. */ + const int numTarget; + /** The number of write buffers. */ + const int writeBuffers; + /** True if the cache should allocate on a write miss. */ + const bool writeAllocate; + /** Pointer to the parent cache. */ + BaseCache* cache; + + /** The Prefetcher */ + BasePrefetcher *prefetcher; + + /** The block size of the parent cache. */ + int blkSize; + + /** Increasing order number assigned to each incoming request. */ + uint64_t order; + + bool prefetchMiss; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** Number of misses that hit in the MSHRs per command and thread. */ + Stats::Vector<> mshr_hits[NUM_MEM_CMDS]; + /** Demand misses that hit in the MSHRs. */ + Stats::Formula demandMshrHits; + /** Total number of misses that hit in the MSHRs. */ + Stats::Formula overallMshrHits; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_misses[NUM_MEM_CMDS]; + /** Demand misses that miss in the MSHRs. */ + Stats::Formula demandMshrMisses; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrMisses; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_uncacheable[NUM_MEM_CMDS]; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrUncacheable; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_miss_latency[NUM_MEM_CMDS]; + /** Total cycle latency of demand MSHR misses. */ + Stats::Formula demandMshrMissLatency; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrMissLatency; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_uncacheable_lat[NUM_MEM_CMDS]; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrUncacheableLatency; + + /** The total number of MSHR accesses per command and thread. */ + Stats::Formula mshrAccesses[NUM_MEM_CMDS]; + /** The total number of demand MSHR accesses. */ + Stats::Formula demandMshrAccesses; + /** The total number of MSHR accesses. */ + Stats::Formula overallMshrAccesses; + + /** The miss rate in the MSHRs pre command and thread. */ + Stats::Formula mshrMissRate[NUM_MEM_CMDS]; + /** The demand miss rate in the MSHRs. */ + Stats::Formula demandMshrMissRate; + /** The overall miss rate in the MSHRs. */ + Stats::Formula overallMshrMissRate; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrMissLatency[NUM_MEM_CMDS]; + /** The average latency of a demand MSHR miss. */ + Stats::Formula demandAvgMshrMissLatency; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrMissLatency; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrUncacheableLatency[NUM_MEM_CMDS]; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrUncacheableLatency; + + /** The number of times a thread hit its MSHR cap. */ + Stats::Vector<> mshr_cap_events; + /** The number of times software prefetches caused the MSHR to block. */ + Stats::Vector<> soft_prefetch_mshr_full; + + Stats::Scalar<> mshr_no_allocate_misses; + + /** + * @} + */ + + private: + /** Pointer to the MSHR that has no targets. */ + MSHR* noTargetMSHR; + + /** + * Allocate a new MSHR to handle the provided miss. + * @param req The miss to buffer. + * @param size The number of bytes to fetch. + * @param time The time the miss occurs. + * @return A pointer to the new MSHR. + */ + MSHR* allocateMiss(Packet * &pkt, int size, Tick time); + + /** + * Allocate a new WriteBuffer to handle the provided write. + * @param req The write to handle. + * @param size The number of bytes to write. + * @param time The time the write occurs. + * @return A pointer to the new write buffer. + */ + MSHR* allocateWrite(Packet * &pkt, int size, Tick time); + + public: + /** + * Simple Constructor. Initializes all needed internal storage and sets + * parameters. + * @param numMSHRs The number of outstanding misses to handle. + * @param numTargets The number of outstanding targets to each miss. + * @param write_buffers The number of outstanding writes to handle. + * @param write_allocate If true, treat write misses the same as reads. + */ + MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss); + + /** + * Deletes all allocated internal storage. + */ + ~MissQueue(); + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Either allocate an MSHR for the request, + * or forward it through the write buffer. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target); + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Collect statistics and free resources of a satisfied request. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param thread_number The thread number of the requests to squash. + */ + void squash(int thread_number); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return mq.getAllocatedTargets(); + } + + /** + * Searches for the supplied address in the miss queue. + * @param addr The address to look for. + * @param asid The address space id. + * @return The MSHR that contains the address, NULL if not found. + * @warning Currently only searches the miss queue. If non write allocate + * might need to search the write buffer for coherence. + */ + MSHR* findMSHR(Addr addr, int asid) const; + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes The list of writes that match the address. + * @return True if any writes are found + */ + bool findWrites(Addr addr, int asid, std::vector& writes) const; + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param xc The execution context of the address space. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed); + + /** + * Perform the given writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending(); + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + mq.allocateTarget(mshr, pkt); + } + + /** + * Allocate a MSHR to hold a list of targets to a block involved in a copy. + * If the block is marked done then the MSHR already holds the data to + * fill the block. Otherwise the block needs to be fetched. + * @param addr The address to buffer. + * @param asid The address space ID. + * @return A pointer to the allocated MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid); + +}; + +#endif //__MISS_QUEUE_HH__ diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc new file mode 100644 index 0000000000..73aeaf6cae --- /dev/null +++ b/src/mem/cache/miss/mshr.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) definitions. + */ + +#include +#include +#include + +#include "mem/cache/miss/mshr.hh" +#include "sim/root.hh" // for curTick +#include "sim/host.hh" +#include "base/misc.hh" +#include "mem/cache/cache.hh" + +using namespace std; + +MSHR::MSHR() +{ + inService = false; + ntargets = 0; + threadNum = -1; +} + +void +MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, + Packet * &target) +{ + assert(targets.empty()); + addr = _addr; + asid = _asid; + + pkt = new Packet(); // allocate new memory request + pkt->addr = addr; //picked physical address for now + pkt->cmd = cmd; + pkt->size = size; + pkt->data = new uint8_t[size]; + pkt->senderState = this; + //Set the time here for latency calculations + //pkt->time = curTick; + + if (target) { + pkt->req = target->req; + allocateTarget(target); + } +} + +// Since we aren't sure if data is being used, don't copy here. +/** + * @todo When we have a "global" data flag, might want to copy data here. + */ +void +MSHR::allocateAsBuffer(Packet * &target) +{ + addr = target->paddr; + asid = target->req->asid; + threadNum = target->thread_num; + pkt = new Packet(); + pkt->addr = target->addr; + pkt->dest = target->dest; + pkt->cmd = target->cmd; + pkt->size = target->size; + pkt->req = target->req; + pkt->data = new uint8_t[target->size]; + pkt->senderState = this; +} + +void +MSHR::deallocate() +{ + assert(targets.empty()); + assert(ntargets == 0); + pkt = NULL; + inService = false; + allocIter = NULL; + readyIter = NULL; +} + +/* + * Adds a target to an MSHR + */ +void +MSHR::allocateTarget(Packet * &target) +{ + //If we append an invalidate and we issued a read to the bus, + //but now have some pending writes, we need to move + //the invalidate to before the first non-read + if (inService && pkt->cmd.isRead() && target->cmd.isInvalidate()) { + std::list temp; + + while (!targets.empty()) { + if (!targets.front()->cmd.isRead()) break; + //Place on top of temp stack + temp.push_front(targets.front()); + //Remove from targets + targets.pop_front(); + } + + //Now that we have all the reads off until first non-read, we can + //place the invalidate on + targets.push_front(target); + + //Now we pop off the temp_stack and put them back + while (!temp.empty()) { + targets.push_front(temp.front()); + temp.pop_front(); + } + } + else { + targets.push_back(target); + } + + ++ntargets; + assert(targets.size() == ntargets); + /** + * @todo really prioritize the target commands. + */ + + if (!inService && target->cmd.isWrite()) { + pkt->cmd = WriteReq; + } +} + + + +void +MSHR::dump() +{ + ccprintf(cerr, + "inService: %d thread: %d\n" + "Addr: %x asid: %d ntargets %d\n" + "Targets:\n", + inService, threadNum, addr, asid, ntargets); + + TargetListIterator tar_it = targets.begin(); + for (int i = 0; i < ntargets; i++) { + assert(tar_it != targets.end()); + + ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", + i, (*tar_it)->paddr, (*tar_it)->cmd.toIndex()); + + tar_it++; + } + ccprintf(cerr, "\n"); +} + +MSHR::~MSHR() +{ + if (pkt) + pkt = NULL; +} diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh new file mode 100644 index 0000000000..167aa26cd1 --- /dev/null +++ b/src/mem/cache/miss/mshr.hh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) declaration. + */ + +#ifndef __MSHR_HH__ +#define __MSHR_HH__ + +#include "mem/packet.hh" +#include +#include + +class MSHR; + +/** + * Miss Status and handling Register. This class keeps all the information + * needed to handle a cache miss including a list of target requests. + */ +class MSHR { + public: + /** Defines the Data structure of the MSHR targetlist. */ + typedef std::list TargetList; + /** Target list iterator. */ + typedef std::list::iterator TargetListIterator; + /** A list of MSHRs. */ + typedef std::list List; + /** MSHR list iterator. */ + typedef List::iterator Iterator; + /** MSHR list const_iterator. */ + typedef List::const_iterator ConstIterator; + + /** Address of the miss. */ + Addr addr; + /** Adress space id of the miss. */ + short asid; + /** True if the request has been sent to the bus. */ + bool inService; + /** Thread number of the miss. */ + int threadNum; + /** The request that is forwarded to the next level of the hierarchy. */ + Packet * pkt; + /** The number of currently allocated targets. */ + short ntargets; + /** The original requesting command. */ + Packet::Command originalCmd; + /** Order number of assigned by the miss queue. */ + uint64_t order; + + /** + * Pointer to this MSHR on the ready list. + * @sa MissQueue, MSHRQueue::readyList + */ + Iterator readyIter; + /** + * Pointer to this MSHR on the allocated list. + * @sa MissQueue, MSHRQueue::allocatedList + */ + Iterator allocIter; + +private: + /** List of all requests that match the address */ + TargetList targets; + +public: + /** + * Allocate a miss to this MSHR. + * @param cmd The requesting command. + * @param addr The address of the miss. + * @param asid The address space id of the miss. + * @param size The number of bytes to request. + * @param req The original miss. + */ + void allocate(Packet::Command cmd, Addr addr, int asid, int size, + Packet * &pkt); + + /** + * Allocate this MSHR as a buffer for the given request. + * @param target The memory request to buffer. + */ + void allocateAsBuffer(Packet * &target); + + /** + * Mark this MSHR as free. + */ + void deallocate(); + + /** + * Add a request to the list of targets. + * @param target The target. + */ + void allocateTarget(Packet * &target); + + /** A simple constructor. */ + MSHR(); + /** A simple destructor. */ + ~MSHR(); + + /** + * Returns the current number of allocated targets. + * @return The current number of allocated targets. + */ + int getNumTargets() + { + return(ntargets); + } + + /** + * Returns a pointer to the target list. + * @return a pointer to the target list. + */ + TargetList* getTargetList() + { + return &targets; + } + + /** + * Returns a reference to the first target. + * @return A pointer to the first target. + */ + Packet * getTarget() + { + return targets.front(); + } + + /** + * Pop first target. + */ + void popTarget() + { + --ntargets; + targets.pop_front(); + } + + /** + * Returns true if there are targets left. + * @return true if there are targets + */ + bool hasTargets() + { + return !targets.empty(); + } + + /** + * Prints the contents of this MSHR to stderr. + */ + void dump(); +}; + +#endif //__MSHR_HH__ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc new file mode 100644 index 0000000000..72c8cc4981 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.cc @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definition of the MSHRQueue. + */ + +#include "mem/cache/miss/mshr_queue.hh" +#include "sim/eventq.hh" + +using namespace std; + +MSHRQueue::MSHRQueue(int num_mshrs, int reserve) + : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve) +{ + allocated = 0; + inServiceMSHRs = 0; + allocatedTargets = 0; + registers = new MSHR[numMSHRs]; + for (int i = 0; i < numMSHRs; ++i) { + freeList.push_back(®isters[i]); + } +} + +MSHRQueue::~MSHRQueue() +{ + delete [] registers; +} + +MSHR* +MSHRQueue::findMatch(Addr addr, int asid) const +{ + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + return mshr; + } + } + return NULL; +} + +bool +MSHRQueue::findMatches(Addr addr, int asid, vector& matches) const +{ + // Need an empty vector + assert(matches.empty()); + bool retval = false; + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + retval = true; + matches.push_back(mshr); + } + } + return retval; + +} + +MSHR* +MSHRQueue::findPending(Packet * &pkt) const +{ + MSHR::ConstIterator i = pendingList.begin(); + MSHR::ConstIterator end = pendingList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr < pkt->addr) { + if (mshr->addr + mshr->pkt->size > pkt->addr) { + return mshr; + } + } else { + if (pkt->addr + pkt->size > mshr->addr) { + return mshr; + } + } + + //need to check destination address for copies. + if (mshr->pkt->cmd == Copy) { + Addr dest = mshr->pkt->dest; + if (dest < pkt->addr) { + if (dest + mshr->pkt->size > pkt->addr) { + return mshr; + } + } else { + if (pkt->addr + pkt->size > dest) { + return mshr; + } + } + } + } + return NULL; +} + +MSHR* +MSHRQueue::allocate(Packet * &pkt, int size) +{ + Addr aligned_addr = pkt->addr & ~((Addr)size - 1); + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + + if (pkt->cmd.isNoResponse()) { + mshr->allocateAsBuffer(pkt); + } else { + assert(size !=0); + mshr->allocate(pkt->cmd, aligned_addr, pkt->req->req->asid, size, pkt); + allocatedTargets += 1; + } + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + mshr->allocate(Read, addr, asid, size, target); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateTargetList(Addr addr, int asid, int size) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + Packet * dummy; + mshr->allocate(Read, addr, asid, size, dummy); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->inService = true; + ++inServiceMSHRs; + ++allocated; + return mshr; +} + + +void +MSHRQueue::deallocate(MSHR* mshr) +{ + deallocateOne(mshr); +} + +MSHR::Iterator +MSHRQueue::deallocateOne(MSHR* mshr) +{ + MSHR::Iterator retval = allocatedList.erase(mshr->allocIter); + freeList.push_front(mshr); + allocated--; + allocatedTargets -= mshr->getNumTargets(); + if (mshr->inService) { + inServiceMSHRs--; + } else { + pendingList.erase(mshr->readyIter); + } + mshr->deallocate(); + return retval; +} + +void +MSHRQueue::moveToFront(MSHR *mshr) +{ + if (!mshr->inService) { + assert(mshr == *(mshr->readyIter)); + pendingList.erase(mshr->readyIter); + mshr->readyIter = pendingList.insert(pendingList.begin(), mshr); + } +} + +void +MSHRQueue::markInService(MSHR* mshr) +{ + //assert(mshr == pendingList.front()); + if (mshr->pkt->cmd.isNoResponse()) { + assert(mshr->getNumTargets() == 0); + deallocate(mshr); + return; + } + mshr->inService = true; + pendingList.erase(mshr->readyIter); + mshr->readyIter = NULL; + inServiceMSHRs += 1; + //pendingList.pop_front(); +} + +void +MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) +{ + assert(mshr->readyIter == NULL); + mshr->pkt->cmd = cmd; + mshr->pkt->flags &= ~SATISFIED; + mshr->inService = false; + --inServiceMSHRs; + /** + * @ todo might want to add rerequests to front of pending list for + * performance. + */ + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); +} + +void +MSHRQueue::squash(int thread_number) +{ + MSHR::Iterator i = allocatedList.begin(); + MSHR::Iterator end = allocatedList.end(); + for (; i != end;) { + MSHR *mshr = *i; + if (mshr->threadNum == thread_number) { + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + + assert(target->thread_num == thread_number); + if (target->completionEvent != NULL) { + delete target->completionEvent; + } + target = NULL; + } + assert(!mshr->hasTargets()); + assert(mshr->ntargets==0); + if (!mshr->inService) { + i = deallocateOne(mshr); + } else { + //mshr->pkt->flags &= ~CACHE_LINE_FILL; + ++i; + } + } else { + ++i; + } + } +} diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh new file mode 100644 index 0000000000..3e1d3f39f0 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Declaration of a structure to manage MSHRs. + */ + +#ifndef __MSHR_QUEUE_HH__ +#define __MSHR_QUEUE_HH__ + +#include +#include "mem/cache/miss/mshr.hh" + +/** + * A Class for maintaining a list of pending and allocated memory requests. + */ +class MSHRQueue { + private: + /** MSHR storage. */ + MSHR* registers; + /** Holds pointers to all allocated MSHRs. */ + MSHR::List allocatedList; + /** Holds pointers to MSHRs that haven't been sent to the bus. */ + MSHR::List pendingList; + /** Holds non allocated MSHRs. */ + MSHR::List freeList; + + // Parameters + /** + * The total number of MSHRs in this queue. This number is set as the + * number of MSHRs requested plus (numReserve - 1). This allows for + * the same number of effective MSHRs while still maintaining the reserve. + */ + const int numMSHRs; + + /** + * The number of MSHRs to hold in reserve. This is needed because copy + * operations can allocate upto 4 MSHRs at one time. + */ + const int numReserve; + + public: + /** The number of allocated MSHRs. */ + int allocated; + /** The number of MSHRs that have been forwarded to the bus. */ + int inServiceMSHRs; + /** The number of targets waiting for response. */ + int allocatedTargets; + + /** + * Create a queue with a given number of MSHRs. + * @param num_mshrs The number of MSHRs in this queue. + * @param reserve The minimum number of MSHRs needed to satisfy any access. + */ + MSHRQueue(int num_mshrs, int reserve = 1); + + /** Destructor */ + ~MSHRQueue(); + + /** + * Find the first MSHR that matches the provide address and asid. + * @param addr The address to find. + * @param asid The address space id. + * @return Pointer to the matching MSHR, null if not found. + */ + MSHR* findMatch(Addr addr, int asid) const; + + /** + * Find and return all the matching MSHRs in the provided vector. + * @param addr The address to find. + * @param asid The address space ID. + * @param matches The vector to return pointers to the matching MSHRs. + * @return True if any matches are found, false otherwise. + * @todo Typedef the vector?? + */ + bool findMatches(Addr addr, int asid, std::vector& matches) const; + + /** + * Find any pending requests that overlap the given request. + * @param req The request to find. + * @return A pointer to the earliest matching MSHR. + */ + MSHR* findPending(Packet * &pkt) const; + + /** + * Allocates a new MSHR for the request and size. This places the request + * as the first target in the MSHR. + * @param req The request to handle. + * @param size The number in bytes to fetch from memory. + * @return The a pointer to the MSHR allocated. + * + * @pre There are free MSHRs. + */ + MSHR* allocate(Packet * &pkt, int size = 0); + + /** + * Allocate a read request for the given address, and places the given + * target on the target list. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @param target The first target for the request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateFetch(Addr addr, int asid, int size, Packet * &target); + + /** + * Allocate a target list for the given address. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid, int size); + + /** + * Removes the given MSHR from the queue. This places the MSHR on the + * free list. + * @param mshr + */ + void deallocate(MSHR* mshr); + + /** + * Allocates a target to the given MSHR. Used to keep track of the number + * of outstanding targets. + * @param mshr The MSHR to allocate the target to. + * @param req The target request. + */ + void allocateTarget(MSHR* mshr, Packet * &pkt) + { + mshr->allocateTarget(pkt); + allocatedTargets += 1; + } + + /** + * Remove a MSHR from the queue. Returns an iterator into the allocatedList + * for faster squash implementation. + * @param mshr The MSHR to remove. + * @return An iterator to the next entry in the allocatedList. + */ + MSHR::Iterator deallocateOne(MSHR* mshr); + + /** + * Moves the MSHR to the front of the pending list if it is not in service. + * @param mshr The mshr to move. + */ + void moveToFront(MSHR *mshr); + + /** + * Mark the given MSHR as in service. This removes the MSHR from the + * pendingList. Deallocates the MSHR if it does not expect a response. + * @param mshr The MSHR to mark in service. + */ + void markInService(MSHR* mshr); + + /** + * Mark an in service mshr as pending, used to resend a request. + * @param mshr The MSHR to resend. + * @param cmd The command to resend. + */ + void markPending(MSHR* mshr, Packet::Command cmd); + + /** + * Squash outstanding requests with the given thread number. If a request + * is in service, just squashes the targets. + * @param thread_number The thread to squash. + */ + void squash(int thread_number); + + /** + * Returns true if the pending list is not empty. + * @return True if there are outstanding requests. + */ + bool havePending() const + { + return !pendingList.empty(); + } + + /** + * Returns true if there are no free MSHRs. + * @return True if this queue is full. + */ + bool isFull() const + { + return (allocated > numMSHRs - numReserve); + } + + /** + * Returns the request at the head of the pendingList. + * @return The next request to service. + */ + Packet * getReq() const + { + if (pendingList.empty()) { + return NULL; + } + MSHR* mshr = pendingList.front(); + return mshr->pkt; + } + + /** + * Returns the number of outstanding targets. + * @return the number of allocated targets. + */ + int getAllocatedTargets() const + { + return allocatedTargets; + } + +}; + +#endif //__MSHR_QUEUE_HH__ diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc new file mode 100644 index 0000000000..14beef2601 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Hardware Prefetcher Definition. + */ + +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include + +BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData) + :size(size), pageStop(pageStop), serialSquash(serialSquash), + cacheCheckPush(cacheCheckPush), only_data(onlyData) +{ +} + +void +BasePrefetcher::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BasePrefetcher::regStats(const std::string &name) +{ + pfIdentified + .name(name + ".prefetcher.num_hwpf_identified") + .desc("number of hwpf identified") + ; + + pfMSHRHit + .name(name + ".prefetcher.num_hwpf_already_in_mshr") + .desc("number of hwpf that were already in mshr") + ; + + pfCacheHit + .name(name + ".prefetcher.num_hwpf_already_in_cache") + .desc("number of hwpf that were already in the cache") + ; + + pfBufferHit + .name(name + ".prefetcher.num_hwpf_already_in_prefetcher") + .desc("number of hwpf that were already in the prefetch queue") + ; + + pfRemovedFull + .name(name + ".prefetcher.num_hwpf_evicted") + .desc("number of hwpf removed due to no buffer left") + ; + + pfRemovedMSHR + .name(name + ".prefetcher.num_hwpf_removed_MSHR_hit") + .desc("number of hwpf removed because MSHR allocated") + ; + + pfIssued + .name(name + ".prefetcher.num_hwpf_issued") + .desc("number of hwpf issued") + ; + + pfSpanPage + .name(name + ".prefetcher.num_hwpf_span_page") + .desc("number of hwpf spanning a virtual page") + ; + + pfSquashed + .name(name + ".prefetcher.num_hwpf_squashed_from_miss") + .desc("number of hwpf that got squashed due to a miss aborting calculation time") + ; +} + +Packet * +BasePrefetcher::getPacket() +{ + DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name()); + + if (pf.empty()) { + DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name()); + return NULL; + } + + Packet * pkt; + bool keepTrying = false; + do { + pkt = *pf.begin(); + pf.pop_front(); + if (!cacheCheckPush) { + keepTrying = inCache(pkt); + } + if (pf.empty()) { + cache->clearMasterRequest(Request_PF); + if (keepTrying) return NULL; //None left, all were in cache + } + } while (keepTrying); + + pfIssued++; + return pkt; +} + +void +BasePrefetcher::handleMiss(Packet * &pkt, Tick time) +{ + if (!pkt->isUncacheable() && !(pkt->isInstRead() && only_data)) + { + //Calculate the blk address + Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + + //Check if miss is in pfq, if so remove it + std::list::iterator iter = inPrefetch(blkAddr); + if (iter != pf.end()) { + DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name()); + pfRemovedMSHR++; + pf.erase(iter); + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + //Remove anything in queue with delay older than time + //since everything is inserted in time order, start from end + //and work until pf.empty() or time is earlier + //This is done to emulate Aborting the previous work on a new miss + //Needed for serial calculators like GHB + if (serialSquash) { + iter = pf.end(); + iter--; + while (!pf.empty() && ((*iter)->time >= time)) { + pfSquashed++; + pf.pop_back(); + iter--; + } + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + + std::list addresses; + std::list delays; + calculatePrefetch(pkt, addresses, delays); + + std::list::iterator addr = addresses.begin(); + std::list::iterator delay = delays.begin(); + while (addr != addresses.end()) + { + DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name()); + //temp calc this here... + pfIdentified++; + //create a prefetch memreq + Packet * prefetch; + prefetch = new Packet(); + prefetch->paddr = (*addr); + prefetch->size = blkSize; + prefetch->cmd = Hard_Prefetch; + prefetch->xc = pkt->xc; + prefetch->data = new uint8_t[blkSize]; + prefetch->req->asid = pkt->req->asid; + prefetch->thread_num = pkt->thread_num; + prefetch->time = time + (*delay); //@todo ADD LATENCY HERE + //... initialize + + //Check if it is already in the cache + if (cacheCheckPush) { + if (inCache(prefetch)) { + addr++; + delay++; + continue; + } + } + + //Check if it is already in the miss_queue + if (inMissQueue(prefetch->paddr, prefetch->req->asid)) { + addr++; + delay++; + continue; + } + + //Check if it is already in the pf buffer + if (inPrefetch(prefetch->paddr) != pf.end()) { + pfBufferHit++; + addr++; + delay++; + continue; + } + + //We just remove the head if we are full + if (pf.size() == size) + { + DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name()); + pfRemovedFull++; + pf.pop_front(); + } + + pf.push_back(prefetch); + prefetch->flags |= CACHE_LINE_FILL; + + //Make sure to request the bus, with proper delay + cache->setMasterRequest(Request_PF, prefetch->time); + + //Increment through the list + addr++; + delay++; + } + } +} + +std::list::iterator +BasePrefetcher::inPrefetch(Addr address) +{ + //Guaranteed to only be one match, we always check before inserting + std::list::iterator iter; + for (iter=pf.begin(); iter != pf.end(); iter++) { + if (((*iter)->paddr & ~(Addr)(blkSize-1)) == address) { + return iter; + } + } + return pf.end(); +} + + diff --git a/src/mem/cache/prefetch/base_prefetcher.hh b/src/mem/cache/prefetch/base_prefetcher.hh new file mode 100644 index 0000000000..3e4fc89d10 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.hh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ + +#include "mem/packet.hh" +#include + +class BaseCache; +class BasePrefetcher +{ + protected: + + /** The Prefetch Queue. */ + std::list pf; + + // PARAMETERS + + /** The number of MSHRs in the Prefetch Queue. */ + const int size; + + /** Pointr to the parent cache. */ + BaseCache* cache; + + /** The block size of the parent cache. */ + int blkSize; + + /** Do we prefetch across page boundaries. */ + bool pageStop; + + /** Do we remove prefetches with later times than a new miss.*/ + bool serialSquash; + + /** Do we check if it is in the cache when inserting into buffer, + or removing.*/ + bool cacheCheckPush; + + /** Do we prefetch on only data reads, or on inst reads as well. */ + bool only_data; + + public: + + Stats::Scalar<> pfIdentified; + Stats::Scalar<> pfMSHRHit; + Stats::Scalar<> pfCacheHit; + Stats::Scalar<> pfBufferHit; + Stats::Scalar<> pfRemovedFull; + Stats::Scalar<> pfRemovedMSHR; + Stats::Scalar<> pfIssued; + Stats::Scalar<> pfSpanPage; + Stats::Scalar<> pfSquashed; + + void regStats(const std::string &name); + + public: + BasePrefetcher(int numMSHRS, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData); + + virtual ~BasePrefetcher() {} + + void setCache(BaseCache *_cache); + + void handleMiss(Packet * &pkt, Tick time); + + Packet * getPacket(); + + bool havePending() + { + return !pf.empty(); + } + + virtual void calculatePrefetch(Packet * &pkt, + std::list &addresses, + std::list &delays) = 0; + + virtual bool inCache(Packet * &pkt) = 0; + + virtual bool inMissQueue(Addr address, int asid) = 0; + + std::list::iterator inPrefetch(Addr address); +}; + + +#endif //__MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/ghb_prefetcher.cc b/src/mem/cache/prefetch/ghb_prefetcher.cc new file mode 100644 index 0000000000..247ec6e8bb --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * GHB Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/ghb_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class GHBPrefetcher, MissQueue>; +template class GHBPrefetcher, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh new file mode 100644 index 0000000000..f25ebe1664 --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a ghb prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class GHBPrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + Addr second_last_miss_addr[64/*MAX_CPUS*/]; + Addr last_miss_addr[64/*MAX_CPUS*/]; + + Tick latency; + int degree; + bool useCPUId; + + public: + + GHBPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~GHBPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays) + { + Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + int cpuID = pkt->cpu_num; + if (!useCPUId) cpuID = 0; + + + int new_stride = blkAddr - last_miss_addr[cpuID]; + int old_stride = last_miss_addr[cpuID] - + second_last_miss_addr[cpuID]; + + second_last_miss_addr[cpuID] = last_miss_addr[cpuID]; + last_miss_addr[cpuID] = blkAddr; + + if (new_stride == old_stride) { + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * new_stride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + } +}; + +#endif // __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/stride_prefetcher.cc b/src/mem/cache/prefetch/stride_prefetcher.cc new file mode 100644 index 0000000000..93a0964689 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * Stride Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/stride_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class StridePrefetcher, MissQueue>; +template class StridePrefetcher, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh new file mode 100644 index 0000000000..f897762151 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.hh @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a strided prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class StridePrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + class strideEntry + { + public: + Addr IAddr; + Addr MAddr; + int stride; + int64_t confidence; + +/* bool operator < (strideEntry a,strideEntry b) + { + if (a.confidence == b.confidence) { + return true; //?????? + } + else return a.confidence < b.confidence; + }*/ + }; + Addr* lastMissAddr[64/*MAX_CPUS*/]; + + std::list table[64/*MAX_CPUS*/]; + Tick latency; + int degree; + bool useCPUId; + + + public: + + StridePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~StridePrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays) + { +// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + int cpuID = pkt->cpu_num; + if (!useCPUId) cpuID = 0; + + /* Scan Table for IAddr Match */ +/* std::list::iterator iter; + for (iter=table[cpuID].begin(); + iter !=table[cpuID].end(); + iter++) { + if ((*iter)->IAddr == pkt->pc) break; + } + + if (iter != table[cpuID].end()) { + //Hit in table + + int newStride = blkAddr - (*iter)->MAddr; + if (newStride == (*iter)->stride) { + (*iter)->confidence++; + } + else { + (*iter)->stride = newStride; + (*iter)->confidence--; + } + + (*iter)->MAddr = blkAddr; + + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * newStride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + else { + //Miss in table + //Find lowest confidence and replace + + } +*/ } +}; + +#endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher.hh b/src/mem/cache/prefetch/tagged_prefetcher.hh new file mode 100644 index 0000000000..17f500dd82 --- /dev/null +++ b/src/mem/cache/prefetch/tagged_prefetcher.hh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a tagged prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class TaggedPrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + Tick latency; + int degree; + + public: + + TaggedPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree); + + ~TaggedPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays); +}; + +#endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh index 7bdabbe142..9e46ba8937 100644 --- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh +++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh @@ -49,10 +49,10 @@ TaggedPrefetcher(int size, bool pageStop, bool serialSquash, template void TaggedPrefetcher:: -calculatePrefetch(MemReqPtr &req, std::list &addresses, +calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = req->paddr & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); for (int d=1; d <= degree; d++) { Addr newAddr = blkAddr + d*(this->blkSize); diff --git a/src/mem/cache/tags/base_tags.cc b/src/mem/cache/tags/base_tags.cc new file mode 100644 index 0000000000..1537373004 --- /dev/null +++ b/src/mem/cache/tags/base_tags.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Definitions of BaseTags. + */ + +#include "mem/cache/tags/base_tags.hh" + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" //maxThreadsPerCPU +#include "sim/sim_exit.hh" + +using namespace std; + +void +BaseTags::setCache(BaseCache *_cache) +{ + cache = _cache; + objName = cache->name(); +} + +void +BaseTags::regStats(const string &name) +{ + using namespace Stats; + replacements + .init(maxThreadsPerCPU) + .name(name + ".replacements") + .desc("number of replacements") + .flags(total) + ; + + tagsInUse + .name(name + ".tagsinuse") + .desc("Cycle average of tags in use") + ; + + totalRefs + .name(name + ".total_refs") + .desc("Total number of references to valid blocks.") + ; + + sampledRefs + .name(name + ".sampled_refs") + .desc("Sample count of references to valid blocks.") + ; + + avgRefs + .name(name + ".avg_refs") + .desc("Average number of references to valid blocks.") + ; + + avgRefs = totalRefs/sampledRefs; + + warmupCycle + .name(name + ".warmup_cycle") + .desc("Cycle when the warmup percentage was hit.") + ; + + registerExitCallback(new BaseTagsCallback(this)); +} diff --git a/src/mem/cache/tags/base_tags.hh b/src/mem/cache/tags/base_tags.hh new file mode 100644 index 0000000000..b7b0c7ef03 --- /dev/null +++ b/src/mem/cache/tags/base_tags.hh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Declaration of a common base class for cache tagstore objects. + */ + +#ifndef __BASE_TAGS_HH__ +#define __BASE_TAGS_HH__ + +#include +#include "base/statistics.hh" +#include "base/callback.hh" + +class BaseCache; + +/** + * A common base class of Cache tagstore objects. + */ +class BaseTags +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + + /** Local copy of the parent cache name. Used for DPRINTF. */ + std::string objName; + + /** + * The number of tags that need to be touched to meet the warmup + * percentage. + */ + int warmupBound; + /** Marked true when the cache is warmed up. */ + bool warmedUp; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of replacements of valid blocks per thread. */ + Stats::Vector<> replacements; + /** Per cycle average of the number of tags that hold valid data. */ + Stats::Average<> tagsInUse; + + /** The total number of references to a block before it is replaced. */ + Stats::Scalar<> totalRefs; + + /** + * The number of reference counts sampled. This is different from + * replacements because we sample all the valid blocks when the simulator + * exits. + */ + Stats::Scalar<> sampledRefs; + + /** + * Average number of references to a block before is was replaced. + * @todo This should change to an average stat once we have them. + */ + Stats::Formula avgRefs; + + /** The cycle that the warmup percentage was hit. */ + Stats::Scalar<> warmupCycle; + /** + * @} + */ + + public: + + /** + * Destructor. + */ + virtual ~BaseTags() {} + + /** + * Set the parent cache back pointer. Also copies the cache name to + * objName. + * @param _cache Pointer to parent cache. + */ + void setCache(BaseCache *_cache); + + /** + * Return the parent cache name. + * @return the parent cache name. + */ + const std::string &name() const + { + return objName; + } + + /** + * Register local statistics. + * @param name The name to preceed each statistic name. + */ + void regStats(const std::string &name); + + /** + * Average in the reference count for valid blocks when the simulation + * exits. + */ + virtual void cleanupRefs() {} +}; + +class BaseTagsCallback : public Callback +{ + BaseTags *tags; + public: + BaseTagsCallback(BaseTags *t) : tags(t) {} + virtual void process() { tags->cleanupRefs(); }; +}; + +#endif //__BASE_TAGS_HH__ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc new file mode 100644 index 0000000000..66d91b35b6 --- /dev/null +++ b/src/mem/cache/tags/fa_lru.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions a fully associative LRU tagstore. + */ + +#include + +#include + +#include "mem/cache/tags/fa_lru.hh" +#include "base/intmath.hh" + +using namespace std; + +FALRU::FALRU(int _blkSize, int _size, int hit_latency) + : blkSize(_blkSize), size(_size), + numBlks(size/blkSize), hitLatency(hit_latency) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) `%d' must be a power of two", + blkSize); + if (!(hitLatency > 0)) + fatal("Access latency in cycles must be at least one cycle"); + if (!isPowerOf2(size)) + fatal("Cache Size must be power of 2 for now"); + + // Track all cache sizes from 128K up by powers of 2 + numCaches = floorLog2(size) - 17; + if (numCaches >0){ + cacheBoundaries = new FALRUBlk *[numCaches]; + cacheMask = (1 << numCaches) - 1; + } else { + cacheMask = 0; + } + + warmedUp = false; + warmupBound = size/blkSize; + + blks = new FALRUBlk[numBlks]; + head = &(blks[0]); + tail = &(blks[numBlks-1]); + + head->prev = NULL; + head->next = &(blks[1]); + head->inCache = cacheMask; + + tail->prev = &(blks[numBlks-2]); + tail->next = NULL; + tail->inCache = 0; + + int index = (1 << 17) / blkSize; + int j = 0; + int flags = cacheMask; + for (int i = 1; i < numBlks-1; i++) { + blks[i].inCache = flags; + if (i == index - 1){ + cacheBoundaries[j] = &(blks[i]); + flags &= ~ (1<tag == blkAddr && blk->isValid(); +} + +void +FALRU::invalidateBlk(int asid, Addr addr) +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = (*tagHash.find(blkAddr)).second; + if (blk) { + assert(blk->tag == blkAddr); + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache) +{ + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Packet * &pkt, int &lat, int *inCache) +{ + Addr addr = pkt->paddr; + + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid) const +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + } else { + blk = NULL; + } + return blk; +} + +FALRUBlk* +FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + FALRUBlk * blk = tail; + assert(blk->inCache == 0); + moveToHead(blk); + tagHash.erase(blk->tag); + tagHash[blkAlign(pkt->paddr)] = blk; + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + //assert(check()); + return blk; +} + +void +FALRU::moveToHead(FALRUBlk *blk) +{ + int updateMask = blk->inCache ^ cacheMask; + for (int i = 0; i < numCaches; i++){ + if ((1<inCache &= ~(1<prev; + } else if (cacheBoundaries[i] == blk) { + cacheBoundaries[i] = blk->prev; + } + } + blk->inCache = cacheMask; + if (blk != head) { + if (blk == tail){ + assert(blk->next == NULL); + tail = blk->prev; + tail->next = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = head; + blk->prev = NULL; + head->prev = blk; + head = blk; + } +} + +bool +FALRU::check() +{ + FALRUBlk* blk = head; + int size = 0; + int boundary = 1<<17; + int j = 0; + int flags = cacheMask; + while (blk) { + size += blkSize; + if (blk->inCache != flags) { + return false; + } + if (size == boundary && blk != tail) { + if (cacheBoundaries[j] != blk) { + return false; + } + flags &=~(1 << j); + boundary = boundary<<1; + ++j; + } + blk = blk->next; + } + return true; +} diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh new file mode 100644 index 0000000000..7855f84550 --- /dev/null +++ b/src/mem/cache/tags/fa_lru.hh @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a fully associative LRU tag store. + */ + +#ifndef __FA_LRU_HH__ +#define __FA_LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" +#include "mem/packet.hh" +#include "base/hashmap.hh" +#include "mem/cache/tags/base_tags.hh" + +/** + * A fully associative cache block. + */ +class FALRUBlk : public CacheBlk +{ +public: + /** The previous block in LRU order. */ + FALRUBlk *prev; + /** The next block in LRU order. */ + FALRUBlk *next; + /** Has this block been touched? */ + bool isTouched; + + /** + * A bit mask of the sizes of cache that this block is resident in. + * Each bit represents a power of 2 in MB size cache. + * If bit 0 is set, this block is in a 1MB cache + * If bit 2 is set, this block is in a 4MB cache, etc. + * There is one bit for each cache smaller than the full size (default + * 16MB). + */ + int inCache; +}; + +/** + * A fully associative LRU cache. Keeps statistics for accesses to a number of + * cache sizes at once. + */ +class FALRU : public BaseTags +{ + public: + /** Typedef the block type used in this class. */ + typedef FALRUBlk BlkType; + /** Typedef a list of pointers to the local block type. */ + typedef std::list BlkList; + protected: + /** The block size of the cache. */ + const int blkSize; + /** The size of the cache. */ + const int size; + /** The number of blocks in the cache. */ + const int numBlks; // calculated internally + /** The hit latency of the cache. */ + const int hitLatency; + + /** Array of pointers to blocks at the cache size boundaries. */ + FALRUBlk **cacheBoundaries; + /** A mask for the FALRUBlk::inCache bits. */ + int cacheMask; + /** The number of different size caches being tracked. */ + int numCaches; + + /** The cache blocks. */ + FALRUBlk *blks; + + /** The MRU block. */ + FALRUBlk *head; + /** The LRU block. */ + FALRUBlk *tail; + + /** Hash table type mapping addresses to cache block pointers. */ + typedef m5::hash_map > hash_t; + /** Iterator into the address hash table. */ + typedef hash_t::const_iterator tagIterator; + + /** The address hash table. */ + hash_t tagHash; + + /** + * Find the cache block for the given address. + * @param addr The address to find. + * @return The cache block of the address, if any. + */ + FALRUBlk * hashLookup(Addr addr) const; + + /** + * Move a cache block to the MRU position. + * @param blk The block to promote. + */ + void moveToHead(FALRUBlk *blk); + + /** + * Check to make sure all the cache boundaries are still where they should + * be. Used for debugging. + * @return True if everything is correct. + */ + bool check(); + + /** + * @defgroup FALRUStats Fully Associative LRU specific statistics + * The FA lru stack lets us track multiple cache sizes at once. These + * statistics track the hits and misses for different cache sizes. + * @{ + */ + + /** Hits in each cache size >= 128K. */ + Stats::Vector<> hits; + /** Misses in each cache size >= 128K. */ + Stats::Vector<> misses; + /** Total number of accesses. */ + Stats::Scalar<> accesses; + + /** + * @} + */ + +public: + /** + * Construct and initialize this cache tagstore. + * @param blkSize The block size of the cache. + * @param size The size of the cache. + * @param hit_latency The hit latency of the cache. + */ + FALRU(int blkSize, int size, int hit_latency); + + /** + * Register the stats for this object. + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return true if the address is found in the cache. + * @param asid The address space ID. + * @param addr The address to look for. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the cache block that contains the given addr. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param addr The address to look for. + * @param asid The address space ID. + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid, int &lat, int *inCache = 0); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param req The req whose block to find + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Packet * &pkt, int &lat, int *inCache = 0); + + /** + * Find the block in the cache, do not update the replacement data. + * @param addr The address to look for. + * @param asid The address space ID. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + FALRUBlk* findReplacement(Packet * &pkt, PacketList* & writebacks, + BlkList &compress_blocks); + + /** + * Return the hit latency of this cache. + * @return The hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Return the block size of this cache. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size of this cache, always the block size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The aligned address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)(blkSize-1)); + } + + /** + * Generate the tag from the addres. For fully associative this is just the + * block address. + * @param addr The address to get the tag from. + * @param blk ignored here + * @return The tag. + */ + Addr extractTag(Addr addr, FALRUBlk *blk) const + { + return blkAlign(addr); + } + + /** + * Return the set of an address. Only one set in a fully associative cache. + * @param addr The address to get the set from. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Calculate the block offset of an address. + * @param addr the address to get the offset of. + * @return the block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & (Addr)(blkSize-1)); + } + + /** + * Regenerate the block address from the tag and the set. + * @param tag The tag of the block. + * @param set The set the block belongs to. + * @return the block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const + { + return (tag); + } + + /** + * Read the data out of the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The data from the cache block. + */ + void readData(FALRUBlk *blk, uint8_t *data) + { + } + + /** + * Write data into the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to be written. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(FALRUBlk *blk, uint8_t *data, int size, + PacketList* &writebacks) + { + } + + /** + * Unimplemented. Perform a cache block copy from block aligned addresses. + * @param source The block aligned source address. + * @param dest The block aligned destination adddress. + * @param asid The address space ID. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) + { + } + + /** + * Unimplemented. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + +}; + +#endif diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc new file mode 100644 index 0000000000..a574adaa32 --- /dev/null +++ b/src/mem/cache/tags/iic.cc @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of the Indirect Index Cache tagstore. + */ + +#include +#include +#include + +#include + +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/iic.hh" +#include "base/intmath.hh" +#include "sim/root.hh" // for curTick + +#include "base/trace.hh" // for DPRINTF + + +using namespace std; + +/** Track the number of accesses to each cache set. */ +#define PROFILE_IIC 1 + +IIC::IIC(IIC::Params ¶ms) : + hashSets(params.numSets), blkSize(params.blkSize), assoc(params.assoc), + hitLatency(params.hitLatency), subSize(params.subblockSize), + numSub(blkSize/subSize), + trivialSize((floorLog2(params.size/subSize)*numSub)/8), + tagShift(floorLog2(blkSize)), blkMask(blkSize - 1), + subShift(floorLog2(subSize)), subMask(numSub - 1), + hashDelay(params.hashDelay), + numBlocks(params.size/subSize), + numTags(hashSets * assoc + params.size/blkSize -1), + numSecondary(params.size/blkSize), + tagNull(numTags), + primaryBound(hashSets * assoc) +{ + int i; + + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (hashSets <= 0 || !isPowerOf2(hashSets)) { + fatal("# of hashsets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + if (numSub*subSize != blkSize) { + fatal("blocksize must be evenly divisible by subblock size"); + } + + // debug stuff + freeSecond = numSecondary; + + warmedUp = false; + warmupBound = params.size/blkSize; + + // Replacement Policy Initialization + repl = params.rp; + repl->setIIC(this); + + //last_miss_time = 0 + + // allocate data reference counters + dataReferenceCount = new int[numBlocks]; + memset(dataReferenceCount, 0, numBlocks*sizeof(int)); + + // Allocate storage for both internal data and block fast access data. + // We allocate it as one large chunk to reduce overhead and to make + // deletion easier. + int data_index = 0; + dataStore = new uint8_t[(numBlocks + numTags) * blkSize]; + dataBlks = new uint8_t*[numBlocks]; + for (i = 0; i < numBlocks; ++i) { + dataBlks[i] = &dataStore[data_index]; + freeDataBlock(i); + data_index += subSize; + } + + assert(data_index == numBlocks * subSize); + + // allocate and init tag store + tagStore = new IICTag[numTags]; + + int blkIndex = 0; + // allocate and init sets + sets = new IICSet[hashSets]; + for (i = 0; i < hashSets; ++i) { + sets[i].assoc = assoc; + sets[i].tags = new IICTag*[assoc]; + sets[i].chain_ptr = tagNull; + + for (int j = 0; j < assoc; ++j) { + IICTag *tag = &tagStore[blkIndex++]; + tag->chain_ptr = tagNull; + tag->data_ptr.resize(numSub); + tag->size = blkSize; + tag->trivialData = new uint8_t[trivialSize]; + tag->numData = 0; + sets[i].tags[j] = tag; + tag->set = i; + tag->data = &dataStore[data_index]; + data_index += blkSize; + } + } + + assert(blkIndex == primaryBound); + + for (i = primaryBound; i < tagNull; i++) { + tagStore[i].chain_ptr = i+1; + //setup data ptrs to subblocks + tagStore[i].data_ptr.resize(numSub); + tagStore[i].size = blkSize; + tagStore[i].trivialData = new uint8_t[trivialSize]; + tagStore[i].numData = 0; + tagStore[i].set = 0; + tagStore[i].data = &dataStore[data_index]; + data_index += blkSize; + } + freelist = primaryBound; +} + +IIC::~IIC() +{ + delete [] dataReferenceCount; + delete [] dataStore; + delete [] tagStore; + delete [] sets; +} + +/* register cache stats */ +void +IIC::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + hitHashDepth.init(0, 20, 1); + missHashDepth.init(0, 20, 1); + setAccess.init(0, hashSets, 1); + + /** IIC Statistics */ + hitHashDepth + .name(name + ".hit_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + missHashDepth + .name(name + ".miss_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + repl->regStats(name); + + if (PROFILE_IIC) + setAccess + .name(name + ".set_access_dist") + .desc("Dist. of Accesses across sets") + .flags(pdf) + ; + + missDepthTotal + .name(name + ".miss_depth_total") + .desc("Total of miss depths") + ; + + hashMiss + .name(name + ".hash_miss") + .desc("Total of misses in hash table") + ; + + hitDepthTotal + .name(name + ".hit_depth_total") + .desc("Total of hit depths") + ; + + hashHit + .name(name + ".hash_hit") + .desc("Total of hites in hash table") + ; +} + +// probe cache for presence of given block. +bool +IIC::probe(int asid, Addr addr) const +{ + return (findBlock(addr,asid) != NULL); +} + +IICTag* +IIC::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + + unsigned long chain_ptr; + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + } + return tag_ptr; +} + + +IICTag* +IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + DPRINTF(IIC, "Finding Replacement for %x\n", pkt->paddr); + unsigned set = hash(pkt->paddr); + IICTag *tag_ptr; + unsigned long *tmp_data = new unsigned long[numSub]; + + // Get a enough subblocks for a full cache line + for (int i = 0; i < numSub; ++i){ + tmp_data[i] = getFreeDataBlock(writebacks); + assert(dataReferenceCount[tmp_data[i]]==0); + } + + tag_ptr = getFreeTag(set, writebacks); + + tag_ptr->set = set; + for (int i=0; i< numSub; ++i) { + tag_ptr->data_ptr[i] = tmp_data[i]; + dataReferenceCount[tag_ptr->data_ptr[i]]++; + } + tag_ptr->numData = numSub; + assert(tag_ptr - tagStore < primaryBound); // make sure it is in primary + tag_ptr->chain_ptr = tagNull; + sets[set].moveToHead(tag_ptr); + delete [] tmp_data; + + list tag_indexes; + repl->doAdvance(tag_indexes); + while (!tag_indexes.empty()) { + if (!tagStore[tag_indexes.front()].isCompressed()) { + compress_blocks.push_back(&tagStore[tag_indexes.front()]); + } + tag_indexes.pop_front(); + } + + tag_ptr->re = (void*)repl->add(tag_ptr-tagStore); + + return tag_ptr; +} + +void +IIC::freeReplacementBlock(PacketList* & writebacks) +{ + IICTag *tag_ptr; + unsigned long data_ptr; + /* consult replacement policy */ + tag_ptr = &tagStore[repl->getRepl()]; + assert(tag_ptr->isValid()); + + DPRINTF(Cache, "Replacing %x in IIC: %s\n", + regenerateBlkAddr(tag_ptr->tag,0), + tag_ptr->isModified() ? "writeback" : "clean"); + /* write back replaced block data */ + if (tag_ptr && (tag_ptr->isValid())) { + int thread_num = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += tag_ptr->refCount; + ++sampledRefs; + tag_ptr->refCount = 0; + + if (tag_ptr->isModified()) { + Packet * writeback = + buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), + tag_ptr->req->asid, tag_ptr->xc, blkSize, + (cache->doData())?tag_ptr->data:0, + tag_ptr->size); + writebacks.push_back(writeback); + } + } + + // free the data blocks + for (int i = 0; i < tag_ptr->numData; ++i) { + data_ptr = tag_ptr->data_ptr[i]; + assert(dataReferenceCount[data_ptr]>0); + if (--dataReferenceCount[data_ptr] == 0) { + freeDataBlock(data_ptr); + } + } + freeTag(tag_ptr); +} + +unsigned long +IIC::getFreeDataBlock(PacketList* & writebacks) +{ + struct IICTag *tag_ptr; + unsigned long data_ptr; + + tag_ptr = NULL; + /* find data block */ + while (blkFreelist.empty()) { + freeReplacementBlock(writebacks); + } + + data_ptr = blkFreelist.front(); + blkFreelist.pop_front(); + DPRINTF(IICMore,"Found free data at %d\n",data_ptr); + return data_ptr; +} + + + +IICTag* +IIC::getFreeTag(int set, PacketList* & writebacks) +{ + unsigned long tag_index; + IICTag *tag_ptr; + // Add new tag + tag_ptr = sets[set].findFree(); + // if no free in primary, and secondary exists + if (!tag_ptr && numSecondary) { + // need to spill a tag into secondary storage + while (freelist == tagNull) { + // get replacements until one is in secondary + freeReplacementBlock(writebacks); + } + + tag_index = freelist; + freelist = tagStore[freelist].chain_ptr; + freeSecond--; + + assert(tag_index != tagNull); + tagSwap(tag_index, sets[set].tags[assoc-1] - tagStore); + tagStore[tag_index].chain_ptr = sets[set].chain_ptr; + sets[set].chain_ptr = tag_index; + + tag_ptr = sets[set].tags[assoc-1]; + } + DPRINTF(IICMore,"Found free tag at %d\n",tag_ptr - tagStore); + tagsInUse++; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + + return tag_ptr; +} + +void +IIC::freeTag(IICTag *tag_ptr) +{ + unsigned long tag_index, tmp_index; + // Fix tag_ptr + if (tag_ptr) { + // we have a tag to clear + DPRINTF(IICMore,"Freeing Tag for %x\n", + regenerateBlkAddr(tag_ptr->tag,0)); + tagsInUse--; + tag_ptr->status = 0; + tag_ptr->numData = 0; + tag_ptr->re = NULL; + tag_index = tag_ptr - tagStore; + if (tag_index >= primaryBound) { + // tag_ptr points to secondary store + assert(tag_index < tagNull); // remove this?? egh + if (tag_ptr->chain_ptr == tagNull) { + // need to fix chain list + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr == tag_index) { + sets[tmp_set].chain_ptr = tagNull; + } else { + tmp_index = sets[tmp_set].chain_ptr; + while (tmp_index != tagNull + && tagStore[tmp_index].chain_ptr != tag_index) { + tmp_index = tagStore[tmp_index].chain_ptr; + } + assert(tmp_index != tagNull); + tagStore[tmp_index].chain_ptr = tagNull; + } + tag_ptr->chain_ptr = freelist; + freelist = tag_index; + freeSecond++; + } else { + // copy next chained entry to this tag location + tmp_index = tag_ptr->chain_ptr; + tagSwap(tmp_index, tag_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + } + } else { + // tag_ptr in primary hash table + assert(tag_index < primaryBound); + tag_ptr->status = 0; + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr != tagNull) { // collapse chain + tmp_index = sets[tmp_set].chain_ptr; + tagSwap(tag_index, tmp_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + sets[tmp_set].chain_ptr = tag_ptr->chain_ptr; + sets[tmp_set].moveToTail(tag_ptr); + } + } + } +} + +void +IIC::freeDataBlock(unsigned long data_ptr) +{ + assert(dataReferenceCount[data_ptr] == 0); + DPRINTF(IICMore, "Freeing data at %d\n", data_ptr); + blkFreelist.push_front(data_ptr); +} + +/** Use a simple modulo hash. */ +#define SIMPLE_HASH 0 + +unsigned +IIC::hash(Addr addr) const { +#if SIMPLE_HASH + return extractTag(addr) % iic_hash_size; +#else + Addr tag, mask, x, y; + tag = extractTag(addr); + mask = hashSets-1; /* assumes iic_hash_size is a power of 2 */ + x = tag & mask; + y = (tag >> (int)(::log(hashSets)/::log(2))) & mask; + assert (x < hashSets && y < hashSets); + return x ^ y; +#endif +} + + +void +IICSet::moveToHead(IICTag *tag) +{ + if (tags[0] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + IICTag *next = tag; + + do { + assert(i < assoc); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + ++i; + } while (next != tag); +} + +void +IICSet::moveToTail(IICTag *tag) +{ + if (tags[assoc-1] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = assoc - 1; + IICTag *next = tag; + + do { + assert(i >= 0); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + --i; + } while (next != tag); +} + +void +IIC::tagSwap(unsigned long index1, unsigned long index2) +{ + DPRINTF(IIC,"Swapping tag[%d]=%x for tag[%d]=%x\n",index1, + tagStore[index1].tag<fixTag(tagStore[index1].re, index2, index1); + if (tagStore[index2].isValid()) + repl->fixTag(tagStore[index2].re, index1, index2); +} + + +IICTag * +IIC::secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *_depth) const +{ + int depth = 0; + while (chain_ptr != tagNull) { + DPRINTF(IIC,"Searching secondary at %d for %x\n", chain_ptr, + tag<isCompressed()) { + // decompress the data here. + } +} + +void +IIC::compressBlock(unsigned long index) +{ + IICTag *tag_ptr = &tagStore[index]; + if (!tag_ptr->isCompressed()) { + // Compress the data here. + } +} + +void +IIC::invalidateBlk(int asid, Addr addr) +{ + IICTag* tag_ptr = findBlock(addr, asid); + if (tag_ptr) { + for (int i = 0; i < tag_ptr->numData; ++i) { + dataReferenceCount[tag_ptr->data_ptr[i]]--; + if (dataReferenceCount[tag_ptr->data_ptr[i]] == 0) { + freeDataBlock(tag_ptr->data_ptr[i]); + } + } + repl->removeEntry(tag_ptr->re); + freeTag(tag_ptr); + } +} + +void +IIC::readData(IICTag *blk, uint8_t *data){ + assert(cache->doData()); + assert(blk->size <= trivialSize || blk->numData > 0); + int data_size = blk->size; + if (data_size > trivialSize) { + for (int i = 0; i < blk->numData; ++i){ + memcpy(data+i*subSize, + &(dataBlks[blk->data_ptr[i]][0]), + (data_size>subSize)?subSize:data_size); + data_size -= subSize; + } + } else { + memcpy(data,blk->trivialData,data_size); + } +} + +void +IIC::writeData(IICTag *blk, uint8_t *write_data, int size, + PacketList* & writebacks){ + assert(cache->doData()); + assert(size < blkSize || !blk->isCompressed()); + DPRINTF(IIC, "Writing %d bytes to %x\n", size, + blk->tag< blk->numData) { + // need to allocate more data blocks + for (int i = blk->numData; i < num_subs; ++i){ + blk->data_ptr[i] = getFreeDataBlock(writebacks); + dataReferenceCount[blk->data_ptr[i]] += 1; + } + } else if (num_subs < blk->numData){ + // can free data blocks + for (int i=num_subs; i < blk->numData; ++i){ + // decrement reference count and compare to zero + /** + * @todo + * Make this work with copying. + */ + if (--dataReferenceCount[blk->data_ptr[i]] == 0) { + freeDataBlock(blk->data_ptr[i]); + } + } + } + + blk->numData = num_subs; + blk->size = size; + assert(size <= trivialSize || blk->numData > 0); + if (size > trivialSize){ + for (int i = 0; i < blk->numData; ++i){ + memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize, + (size>subSize)?subSize:size); + size -= subSize; + } + } else { + memcpy(blk->trivialData,write_data,size); + } +} + + +/** + * @todo This code can break if the src is evicted to get a tag for the dest. + */ +void +IIC::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + IICTag *dest_tag = findBlock(dest, asid); + + if (dest_tag) { + for (int i = 0; i < dest_tag->numData; ++i) { + if (--dataReferenceCount[dest_tag->data_ptr[i]] == 0) { + freeDataBlock(dest_tag->data_ptr[i]); + } + } + // Reset replacement entry + } else { + dest_tag = getFreeTag(hash(dest), writebacks); + dest_tag->re = (void*) repl->add(dest_tag - tagStore); + dest_tag->set = hash(dest); + dest_tag->tag = extractTag(dest); + dest_tag->req->asid = asid; + dest_tag->status = BlkValid | BlkWritable; + } + // Find the source tag here since it might move if we need to find a + // tag for the destination. + IICTag *src_tag = findBlock(source, asid); + assert(src_tag); + assert(!cache->doData() || src_tag->size <= trivialSize + || src_tag->numData > 0); + // point dest to source data and inc counter + for (int i = 0; i < src_tag->numData; ++i) { + dest_tag->data_ptr[i] = src_tag->data_ptr[i]; + ++dataReferenceCount[dest_tag->data_ptr[i]]; + } + + // Maintain fast access data. + memcpy(dest_tag->data, src_tag->data, blkSize); + + dest_tag->xc = src_tag->xc; + dest_tag->size = src_tag->size; + dest_tag->numData = src_tag->numData; + if (src_tag->numData == 0) { + // Data is stored in the trivial data, just copy it. + memcpy(dest_tag->trivialData, src_tag->trivialData, src_tag->size); + } + + dest_tag->status |= BlkDirty; + if (dest_tag->size < blkSize) { + dest_tag->status |= BlkCompressed; + } else { + dest_tag->status &= ~BlkCompressed; + } +} + +void +IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) +{ + // if reference counter is greater than 1, do copy + // else do write + Addr blk_addr = blkAlign(pkt->paddr); + IICTag* blk = findBlock(blk_addr, pkt->req->asid); + + if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) { + // copy the data + // Mark the block as referenced so it doesn't get replaced. + blk->status |= BlkReferenced; + for (int i = 0; i < blk->numData; ++i){ + unsigned long new_data = getFreeDataBlock(writebacks); + // Need to refresh pointer + /** + * @todo Remove this refetch once we change IIC to pointer based + */ + blk = findBlock(blk_addr, pkt->req->asid); + assert(blk); + if (cache->doData()) { + memcpy(&(dataBlks[new_data][0]), + &(dataBlks[blk->data_ptr[i]][0]), + subSize); + } + dataReferenceCount[blk->data_ptr[i]]--; + dataReferenceCount[new_data]++; + blk->data_ptr[i] = new_data; + } + } +} + +void +IIC::cleanupRefs() +{ + for (int i = 0; i < numTags; ++i) { + if (tagStore[i].isValid()) { + totalRefs += tagStore[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh new file mode 100644 index 0000000000..ef3f03c534 --- /dev/null +++ b/src/mem/cache/tags/iic.hh @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of the Indirect Index Cache (IIC) tags store. + */ + +#ifndef __IIC_HH__ +#define __IIC_HH__ + +#include +#include + +#include "mem/cache/cache_blk.hh" +#include "mem/cache/tags/repl/repl.hh" +#include "mem/packet.hh" +#include "base/statistics.hh" +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; // Forward declaration + +/** + * IIC cache blk. + */ +class IICTag : public CacheBlk +{ + public: + /** + * Copy the contents of the given IICTag into this one. + * @param rhs The tag to copy. + * @return const reference to this tag. + */ + const IICTag& operator=(const IICTag& rhs) + { + CacheBlk::operator=(rhs); + chain_ptr = rhs.chain_ptr; + re = rhs.re; + set = rhs.set; + trivialData = rhs.trivialData; + numData = rhs.numData; + data_ptr.clear(); + for (int i = 0; i < rhs.numData; ++i) { + data_ptr.push_back(rhs.data_ptr[i]); + } + return *this; + } + + /** Hash chain pointer into secondary store. */ + unsigned long chain_ptr; + /** Data array pointers for each subblock. */ + std::vector data_ptr; + /** Replacement Entry pointer. */ + void *re; + /** + * An array to store small compressed data. Conceputally the same size + * as the unsused data array pointers. + */ + uint8_t *trivialData; + /** + * The number of allocated subblocks. + */ + int numData; +}; + +/** + * A hash set for the IIC primary lookup table. + */ +class IICSet{ + public: + /** The associativity of the primary table. */ + int assoc; + + /** The number of hash chains followed when finding the last block. */ + int depth; + /** The current number of blocks on the chain. */ + int size; + + /** Tag pointer into the secondary tag storage. */ + unsigned long chain_ptr; + + /** The LRU list of the primary table. MRU is at 0 index. */ + IICTag ** tags; + + /** + * Find the addr in this set, return the chain pointer to the secondary if + * it isn't found. + * @param asid The address space ID. + * @param tag The address to find. + * @param chain_ptr The chain pointer to start the search of the secondary + * @return Pointer to the tag, NULL if not found. + */ + IICTag* findTag(int asid, Addr tag, unsigned long &chain_ptr) + { + depth = 1; + for (int i = 0; i < assoc; ++i) { + if (tags[i]->tag == tag && tags[i]->isValid()) { + return tags[i]; + } + } + chain_ptr = this->chain_ptr; + return 0; + } + + /** + * Find an usused tag in this set. + * @return Pointer to the unused tag, NULL if none are free. + */ + IICTag* findFree() + { + for (int i = 0; i < assoc; ++i) { + if (!tags[i]->isValid()) { + return tags[i]; + } + } + return 0; + } + + /** + * Move a tag to the head of the LRU list + * @param tag The tag to move. + */ + void moveToHead(IICTag *tag); + + /** + * Move a tag to the tail (LRU) of the LRU list + * @param tag The tag to move. + */ + void moveToTail(IICTag *tag); +}; + +/** + * The IIC tag store. This is a hardware-realizable, fully-associative tag + * store that uses software replacement, e.g. Gen. + */ +class IIC : public BaseTags +{ + public: + /** Typedef of the block type used in this class. */ + typedef IICTag BlkType; + /** Typedef for list of pointers to the local block type. */ + typedef std::list BlkList; + protected: + /** The number of set in the primary table. */ + const int hashSets; + /** The block size in bytes. */ + const int blkSize; + /** The associativity of the primary table. */ + const int assoc; + /** The base hit latency. */ + const int hitLatency; + /** The subblock size, used for compression. */ + const int subSize; + + /** The number of subblocks */ + const int numSub; + /** The number of bytes used by data pointers */ + const int trivialSize; + + /** The amount to shift address to get the tag. */ + const int tagShift; + /** The mask to get block offset bits. */ + const unsigned blkMask; + + /** The amount to shift to get the subblock number. */ + const int subShift; + /** The mask to get the correct subblock number. */ + const unsigned subMask; + + /** The latency of a hash lookup. */ + const int hashDelay; + /** The number of data blocks. */ + const int numBlocks; + /** The total number of tags in primary and secondary. */ + const int numTags; + /** The number of tags in the secondary tag store. */ + const int numSecondary; + + /** The Null tag pointer. */ + const int tagNull; + /** The last tag in the primary table. */ + const int primaryBound; + + /** All of the tags */ + IICTag *tagStore; + /** + * Pointer to the head of the secondary freelist (maintained with chain + * pointers. + */ + unsigned long freelist; + /** + * The data block freelist. + */ + std::list blkFreelist; + + /** The primary table. */ + IICSet *sets; + + /** The replacement policy. */ + Repl *repl; + + /** An array of data reference counters. */ + int *dataReferenceCount; + + /** The data blocks. */ + uint8_t *dataStore; + + /** Storage for the fast access data of each cache block. */ + uint8_t **dataBlks; + + /** + * Count of the current number of free secondary tags. + * Used for debugging. + */ + int freeSecond; + + // IIC Statistics + /** + * @addtogroup IICStatistics IIC Statistics + * @{ + */ + + /** Hash hit depth of cache hits. */ + Stats::Distribution<> hitHashDepth; + /** Hash depth for cache misses. */ + Stats::Distribution<> missHashDepth; + /** Count of accesses to each hash set. */ + Stats::Distribution<> setAccess; + + /** The total hash depth for every miss. */ + Stats::Scalar<> missDepthTotal; + /** The total hash depth for all hits. */ + Stats::Scalar<> hitDepthTotal; + /** The number of hash misses. */ + Stats::Scalar<> hashMiss; + /** The number of hash hits. */ + Stats::Scalar<> hashHit; + /** @} */ + + public: + /** + * Collection of parameters for the IIC. + */ + class Params { + public: + /** The size in bytes of the cache. */ + int size; + /** The number of sets in the primary table. */ + int numSets; + /** The block size in bytes. */ + int blkSize; + /** The associativity of the primary table. */ + int assoc; + /** The number of cycles for each hash lookup. */ + int hashDelay; + /** The number of cycles to read the data. */ + int hitLatency; + /** The replacement policy. */ + Repl *rp; + /** The subblock size in bytes. */ + int subblockSize; + }; + + /** + * Construct and initialize this tag store. + * @param params The IIC parameters. + * @todo + * Should make a way to have less tags in the primary than blks in the + * cache. Also should be able to specify number of secondary blks. + */ + IIC(Params ¶ms); + + /** + * Destructor. + */ + virtual ~IIC(); + + /** + * Register the statistics. + * @param name The name to prepend to the statistic descriptions. + */ + void regStats(const std::string &name); + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set Not needed for the iic. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) { + return (((Addr)tag << tagShift)); + } + + /** + * Return the block size. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. + * @return The subblock size. + */ + int getSubBlockSize() + { + return subSize; + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @param blk Ignored here. + * @return the tag. + */ + Addr extractTag(Addr addr, IICTag *blk) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @return the tag. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Return the set, always 0 for IIC. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Get the block offset of an address. + * @param addr The address to get the offset of. + * @return the block offset of the address. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Check for the address in the tagstore. + * @param asid The address space ID. + * @param addr The address to find. + * @return true if it is found. + */ + bool probe(int asid, Addr addr) const; + + /** + * Swap the position of two tags. + * @param index1 The first tag location. + * @param index2 The second tag location. + */ + void tagSwap(unsigned long index1, unsigned long index2); + + /** + * Clear the reference bit of the tag and return its old value. + * @param index The pointer of the tag to manipulate. + * @return The previous state of the reference bit. + */ + bool clearRef(unsigned long index) + { + bool tmp = tagStore[index].isReferenced(); + tagStore[index].status &= ~BlkReferenced; + return tmp; + } + + /** + * Decompress a block if it is compressed. + * @param index The tag store index for the block to uncompress. + */ + void decompressBlock(unsigned long index); + + /** + * Try and compress a block if it is not already compressed. + * @param index The tag store index for the block to compress. + */ + void compressBlock(unsigned long index); + + /** + * Invalidate the block containing the address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid, int &lat); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Packet * &pkt, int &lat); + + /** + * Find the block, do not update the replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + IICTag* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Read the data from the internal storage of the given cache block. + * @param blk The block to read the data from. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(IICTag *blk, uint8_t *data); + + /** + * Write the data into the internal storage of the given cache block. + * @param blk The block to write to. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(IICTag *blk, uint8_t *data, int size, + PacketList* & writebacks); + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * If a block is currently marked copy on write, copy it before writing. + * @param req The write request. + * @param writebacks List for any generated writeback requests. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks); + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +private: + /** + * Return the hash of the address. + * @param addr The address to hash. + * @return the hash of the address. + */ + unsigned hash(Addr addr) const; + + /** + * Search for a block in the secondary tag store. Returns the number of + * hash lookups as a side effect. + * @param asid The address space ID. + * @param tag The tag to match. + * @param chain_ptr The first entry to search. + * @param depth The number of hash lookups made while searching. + * @return A pointer to the block if found. + */ + IICTag *secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *depth) const; + + /** + * Free the resources associated with the next replacement block. + * @param writebacks A list of any writebacks to perform. + */ + void freeReplacementBlock(PacketList* & writebacks); + + /** + * Return the pointer to a free data block. + * @param writebacks A list of any writebacks to perform. + * @return A pointer to a free data block. + */ + unsigned long getFreeDataBlock(PacketList* & writebacks); + + /** + * Get a free tag in the given hash set. + * @param set The hash set to search. + * @param writebacks A list of any writebacks to perform. + * @return a pointer to a free tag. + */ + IICTag* getFreeTag(int set, PacketList* & writebacks); + + /** + * Free the resources associated with the given tag. + * @param tag_ptr The tag to free. + */ + void freeTag(IICTag *tag_ptr); + + /** + * Mark the given data block as being available. + * @param data_ptr The data block to free. + */ + void freeDataBlock(unsigned long data_ptr); +}; +#endif // __IIC_HH__ + diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc new file mode 100644 index 0000000000..0fe88fd087 --- /dev/null +++ b/src/mem/cache/tags/lru.cc @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of LRU tag store. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/lru.hh" +#include "sim/root.hh" + +using namespace std; + +LRUBlk* +CacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +CacheSet::moveToHead(LRUBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + LRUBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + LRUBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + LRUBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new CacheSet[numSets]; + blks = new LRUBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new LRUBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->req->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + } + } +} + +LRU::~LRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +// probe cache for presence of given block. +bool +LRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + LRUBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +LRUBlk* +LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + // grab a replacement candidate + LRUBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + return blk; +} + +void +LRU::invalidateBlk(int asid, Addr addr) +{ + LRUBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + LRUBlk *source_blk = findBlock(source, asid); + assert(source_blk); + LRUBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +LRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh new file mode 100644 index 0000000000..9b4a557772 --- /dev/null +++ b/src/mem/cache/tags/lru.hh @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a LRU tag store. + */ + +#ifndef __LRU_HH__ +#define __LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * LRU cache block. + */ +class LRUBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; +}; + +/** + * An associative set of cache blocks. + */ +class CacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + LRUBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + LRUBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(LRUBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class LRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef LRUBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + + /** The cache sets. */ + CacheSet *sets; + + /** The cache blocks. */ + LRUBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency); + + /** + * Destructor + */ + virtual ~LRU(); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The request whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + LRUBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, LRUBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(LRUBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(LRUBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/repl/gen.cc b/src/mem/cache/tags/repl/gen.cc new file mode 100644 index 0000000000..ec1c2aaf39 --- /dev/null +++ b/src/mem/cache/tags/repl/gen.cc @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + */ + +/** + * @file + * Definitions of the Generational replacement policy. + */ + +#include + +#include "base/misc.hh" +#include "mem/cache/tags/iic.hh" +#include "mem/cache/tags/repl/gen.hh" +#include "sim/builder.hh" +#include "sim/host.hh" + +using namespace std; + +GenRepl::GenRepl(const string &_name, + int _num_pools, + int _fresh_res, + int _pool_res) // fix this, should be set by cache + : Repl(_name) +{ + num_pools = _num_pools; + fresh_res = _fresh_res; + pool_res = _pool_res; + num_entries = 0; + num_pool_entries = 0; + misses = 0; + pools = new GenPool[num_pools+1]; +} + +GenRepl::~GenRepl() +{ + delete [] pools; +} + +unsigned long +GenRepl::getRepl() +{ + unsigned long tmp; + GenReplEntry *re; + int i; + int num_seen = 0; + if (!(num_pool_entries>0)) { + fatal("No blks available to replace"); + } + num_entries--; + num_pool_entries--; + for (i = 0; i < num_pools; i++) { + while ((re = pools[i].pop())) { + num_seen++; + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp = re->tag_ptr; + delete re; + + repl_pool.sample(i); + + return tmp; + } + } + } + fatal("No replacement found"); + return 0xffffffff; +} + +unsigned long * +GenRepl::getNRepl(int n) +{ + unsigned long *tmp; + GenReplEntry *re; + int i; + if (!(num_pool_entries>(n-1))) { + fatal("Not enough blks available to replace"); + } + num_entries -= n; + num_pool_entries -= n; + tmp = new unsigned long[n]; /* array of cache_blk pointers */ + int blk_index = 0; + for (i = 0; i < num_pools && blk_index < n; i++) { + while (blk_index < n && (re = pools[i].pop())) { + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp[blk_index] = re->tag_ptr; + blk_index++; + delete re; + repl_pool.sample(i); + } + } + } + if (blk_index >= n) + return tmp; + /* search the fresh pool */ + + fatal("No N replacements found"); + return NULL; +} + +void +GenRepl::doAdvance(std::list &demoted) +{ + int i; + int num_seen = 0; + GenReplEntry *re; + misses++; + for (i=0; i pool_res && (re = pools[i].pop())!=NULL) { + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(i); + } + else { + pools[(((i-1)<0)?i:i-1)].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(i); + } + } + num_seen += pools[i].size; + } + while (misses-pools[num_pools].oldest > fresh_res + && (re = pools[num_pools].pop())!=NULL) { + num_pool_entries++; + if (iic->clearRef(re->tag_ptr)) { + pools[num_pools/2].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(num_pools); + } + else { + pools[num_pools/2-1].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(num_pools); + } + } +} + +void* +GenRepl::add(unsigned long tag_index) +{ + GenReplEntry *re = new GenReplEntry; + re->tag_ptr = tag_index; + re->valid = true; + pools[num_pools].push(re, misses); + num_entries++; + return (void*)re; +} + +void +GenRepl::regStats(const string name) +{ + using namespace Stats; + + /** GEN statistics */ + repl_pool + .init(0, 16, 1) + .name(name + ".repl_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + advance_pool + .init(0, 16, 1) + .name(name + ".advance_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + demote_pool + .init(0, 16, 1) + .name(name + ".demote_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; +} + +int +GenRepl::fixTag(void* _re, unsigned long old_index, unsigned long new_index) +{ + GenReplEntry *re = (GenReplEntry*)_re; + assert(re->valid); + if (re->tag_ptr == old_index) { + re->tag_ptr = new_index; + return 1; + } + fatal("Repl entry: tag ptrs do not match"); + return 0; +} + +bool +GenRepl::findTagPtr(unsigned long index) +{ + for (int i = 0; i < num_pools + 1; ++i) { + list::const_iterator iter = pools[i].entries.begin(); + list::const_iterator end = pools[i].entries.end(); + for (; iter != end; ++iter) { + if ((*iter)->valid && (*iter)->tag_ptr == index) { + return true; + } + } + } + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + Param num_pools; + Param fresh_res; + Param pool_res; + +END_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(GenRepl) + + INIT_PARAM(num_pools, "capacity in bytes"), + INIT_PARAM(fresh_res, "associativity"), + INIT_PARAM(pool_res, "block size in bytes") + +END_INIT_SIM_OBJECT_PARAMS(GenRepl) + + +CREATE_SIM_OBJECT(GenRepl) +{ + return new GenRepl(getInstanceName(), num_pools, fresh_res, pool_res); +} + +REGISTER_SIM_OBJECT("GenRepl", GenRepl) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/gen.hh b/src/mem/cache/tags/repl/gen.hh new file mode 100644 index 0000000000..c1ceb3f4eb --- /dev/null +++ b/src/mem/cache/tags/repl/gen.hh @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declarations of generational replacement policy + */ + +#ifndef ___GEN_HH__ +#define __GEN_HH__ + +#include + +#include "base/statistics.hh" +#include "mem/cache/tags/repl/repl.hh" + +/** + * Generational Replacement entry. + */ +class GenReplEntry +{ + public: + /** Valid flag, used to quickly invalidate bogus entries. */ + bool valid; + /** The difference between this entry and the previous in the pool. */ + int delta; + /** Pointer to the corresponding tag in the IIC. */ + unsigned long tag_ptr; +}; + +/** + * Generational replacement pool + */ +class GenPool +{ + public: + /** The time the last entry was added. */ + Tick newest; + /** The time the oldest entry was added. */ + Tick oldest; + /** List of the replacement entries in this pool. */ + std::list entries; + + /** The number of entries in this pool. */ + int size; + + /** + * Simple constructor. + */ + GenPool() { + newest = 0; + oldest = 0; + size = 0; + } + + /** + * Add an entry to this pool. + * @param re The entry to add. + * @param now The current time. + */ + void push(GenReplEntry *re, Tick now) { + ++size; + if (!entries.empty()) { + re->delta = now - newest; + newest = now; + } else { + re->delta = 0; + newest = oldest = now; + } + entries.push_back(re); + } + + /** + * Remove an entry from the pool. + * @return The entry at the front of the list. + */ + GenReplEntry* pop() { + GenReplEntry *tmp = NULL; + if (!entries.empty()) { + --size; + tmp = entries.front(); + entries.pop_front(); + oldest += tmp->delta; + } + return tmp; + } + + /** + * Return the entry at the front of the list. + * @return the entry at the front of the list. + */ + GenReplEntry* top() { + return entries.front(); + } + + /** + * Destructor. + */ + ~GenPool() { + while (!entries.empty()) { + GenReplEntry *tmp = entries.front(); + entries.pop_front(); + delete tmp; + } + } +}; + +/** + * Generational replacement policy for use with the IIC. + * @todo update to use STL and for efficiency + */ +class GenRepl : public Repl +{ + public: + /** The array of pools. */ + GenPool *pools; + /** The number of pools. */ + int num_pools; + /** The amount of time to stay in the fresh pool. */ + int fresh_res; + /** The amount of time to stay in the normal pools. */ + int pool_res; + /** The maximum number of entries */ + int num_entries; + /** The number of entries currently in the pools. */ + int num_pool_entries; + /** The number of misses. Used as the internal time. */ + Tick misses; + + // Statistics + + /** + * @addtogroup CacheStatistics + * @{ + */ + /** The number of replacements from each pool. */ + Stats::Distribution<> repl_pool; + /** The number of advances out of each pool. */ + Stats::Distribution<> advance_pool; + /** The number of demotions from each pool. */ + Stats::Distribution<> demote_pool; + /** + * @} + */ + + /** + * Constructs and initializes this replacement policy. + * @param name The name of the policy. + * @param num_pools The number of pools to use. + * @param fresh_res The amount of time to wait in the fresh pool. + * @param pool_res The amount of time to wait in the normal pools. + */ + GenRepl(const std::string &name, int num_pools, + int fresh_res, int pool_res); + + /** + * Destructor. + */ + ~GenRepl(); + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl(); + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n); + + /** + * Update replacement data + */ + virtual void doAdvance(std::list &demoted); + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index); + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name); + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index); + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) + { + ((GenReplEntry*)re)->valid = false; + } + + protected: + /** + * Debug function to verify that there is only one repl entry per tag. + * @param index The tag index to check. + */ + bool findTagPtr(unsigned long index); +}; + +#endif /* __GEN_HH__ */ diff --git a/src/mem/cache/tags/repl/repl.cc b/src/mem/cache/tags/repl/repl.cc new file mode 100644 index 0000000000..ce781eb9f7 --- /dev/null +++ b/src/mem/cache/tags/repl/repl.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * Definitions of the base replacement class. + */ + +#include "sim/param.hh" +#include "mem/cache/tags/repl/repl.hh" + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +DEFINE_SIM_OBJECT_CLASS_NAME("Repl", Repl) + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/repl.hh b/src/mem/cache/tags/repl/repl.hh new file mode 100644 index 0000000000..7c289a5c1d --- /dev/null +++ b/src/mem/cache/tags/repl/repl.hh @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Nathan Binkert + */ + +/** + * @file + * Declaration of a base replacement policy class. + */ + +#ifndef __REPL_HH__ +#define __REPL_HH__ + +#include +#include + +#include "cpu/smt.hh" +#include "sim/host.hh" +#include "sim/sim_object.hh" + + +class IIC; + +/** + * A pure virtual base class that defines the interface of a replacement + * policy. + */ +class Repl : public SimObject +{ + public: + /** Pointer to the IIC using this policy. */ + IIC *iic; + + /** + * Construct and initialize this polixy. + * @param name The instance name of this policy. + */ + Repl (const std::string &name) + : SimObject(name) + { + iic = NULL; + } + + /** + * Set the back pointer to the IIC. + * @param iic_ptr Pointer to the IIC. + */ + void setIIC(IIC *iic_ptr) + { + iic = iic_ptr; + } + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl() = 0; + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n) = 0; + + /** + * Update replacement data + */ + virtual void doAdvance(std::list &demoted) = 0; + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index) = 0; + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name) = 0; + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index) = 0; + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) = 0; +}; + +#endif /* SMT_REPL_HH */ diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc new file mode 100644 index 0000000000..9d9036abb2 --- /dev/null +++ b/src/mem/cache/tags/split.cc @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of split cache tag store. + */ + +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/intmath.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/split.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "mem/cache/tags/split_lru.hh" + + +using namespace std; +using namespace TheISA; + +// create and initialize a partitioned cache structure +Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency) +{ + DPRINTF(Split, "new split cache!!\n"); + + DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n", + numSets, blkSize, LRU1_assoc, hitLatency); + + lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1); + + if (total_ways - LRU1_assoc == 0) { + lifo_net = NULL; + lru_net = NULL; + } else { + if (lifo) { + DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n", + (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc)); + lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize, + (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2); + lru_net = NULL; + } + else { + DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc); + lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2); + lifo_net = NULL; + } + } + + blkMask = blkSize - 1; + + if (!isPowerOf2(total_ways)) + warn("total cache ways/columns %d should be power of 2", + total_ways); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * total_ways; + +} + +Split::~Split() +{ + delete lru; + if (lifo) + delete lifo_net; + else + delete lru_net; +} + +void +Split::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + usedEvictDist.init(0,3000,40); + unusedEvictDist.init(0,3000,40); + useByCPUCycleDist.init(0,35,1); + + nic_repl + .name(name + ".nic_repl") + .desc("number of replacements in the nic partition") + .precision(0) + ; + + cpu_repl + .name(name + ".cpu_repl") + .desc("number of replacements in the cpu partition") + .precision(0) + ; + + lru->regStats(name + ".lru"); + + if (lifo && lifo_net) { + lifo_net->regStats(name + ".lifo_net"); + } else if (lru_net) { + lru_net->regStats(name + ".lru_net"); + } + + nicUsedWhenEvicted + .name(name + ".nicUsedWhenEvicted") + .desc("number of NIC blks that were used before evicted") + ; + + nicUsedTotLatency + .name(name + ".nicUsedTotLatency") + .desc("total cycles before eviction of used NIC blks") + ; + + nicUsedTotEvicted + .name(name + ".nicUsedTotEvicted") + .desc("total number of used NIC blks evicted") + ; + + nicUsedAvgLatency + .name(name + ".nicUsedAvgLatency") + .desc("avg number of cycles a used NIC blk is in cache") + .precision(0) + ; + nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted; + + usedEvictDist + .name(name + ".usedEvictDist") + .desc("distribution of used NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUnusedWhenEvicted + .name(name + ".nicUnusedWhenEvicted") + .desc("number of NIC blks that were unused when evicted") + ; + + nicUnusedTotLatency + .name(name + ".nicUnusedTotLatency") + .desc("total cycles before eviction of unused NIC blks") + ; + + nicUnusedTotEvicted + .name(name + ".nicUnusedTotEvicted") + .desc("total number of unused NIC blks evicted") + ; + + nicUnusedAvgLatency + .name(name + ".nicUnusedAvgLatency") + .desc("avg number of cycles an unused NIC blk is in cache") + .precision(0) + ; + nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted; + + unusedEvictDist + .name(name + ".unusedEvictDist") + .desc("distribution of unused NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUseByCPUCycleTotal + .name(name + ".nicUseByCPUCycleTotal") + .desc("total latency of NIC blks til usage time") + ; + + nicBlksUsedByCPU + .name(name + ".nicBlksUsedByCPU") + .desc("total number of NIC blks used") + ; + + nicAvgUsageByCPULatency + .name(name + ".nicAvgUsageByCPULatency") + .desc("average number of cycles before a NIC blk that is used gets used") + .precision(0) + ; + nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU; + + useByCPUCycleDist + .name(name + ".useByCPUCycleDist") + .desc("the distribution of cycle time in cache before NIC blk is used") + .flags(pdf | cdf) + ; + + cpuUsedBlks + .name(name + ".cpuUsedBlks") + .desc("number of cpu blks that were used before evicted") + ; + + cpuUnusedBlks + .name(name + ".cpuUnusedBlks") + .desc("number of cpu blks that were unused before evicted") + ; + + nicAvgLatency + .name(name + ".nicAvgLatency") + .desc("avg number of cycles a NIC blk is in cache before evicted") + .precision(0) + ; + nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) / + (nicUnusedTotEvicted + nicUsedTotEvicted); + + NR_CP_hits + .name(name + ".NR_CP_hits") + .desc("NIC requests hitting in CPU Partition") + ; + + NR_NP_hits + .name(name + ".NR_NP_hits") + .desc("NIC requests hitting in NIC Partition") + ; + + CR_CP_hits + .name(name + ".CR_CP_hits") + .desc("CPU requests hitting in CPU partition") + ; + + CR_NP_hits + .name(name + ".CR_NP_hits") + .desc("CPU requests hitting in NIC partition") + ; + +} + +// probe cache for presence of given block. +bool +Split::probe(int asid, Addr addr) const +{ + bool success = lru->probe(asid, addr); + if (!success) { + if (lifo && lifo_net) + success = lifo_net->probe(asid, addr); + else if (lru_net) + success = lru_net->probe(asid, addr); + } + + return success; +} + +SplitBlk* +Split::findBlock(Packet * &pkt, int &lat) +{ + + Addr aligned = blkAlign(pkt->paddr); + + if (memHash.count(aligned)) { + memHash[aligned]++; + } else if (pkt->nic_pkt) { + memHash[aligned] = 1; + } + + SplitBlk *blk = lru->findBlock(pkt->paddr, pkt->req->asid, lat); + if (blk) { + if (pkt->nic_pkt) { + NR_CP_hits++; + } else { + CR_CP_hits++; + } + } else { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(pkt->paddr, pkt->req->asid, lat); + + } else if (lru_net) { + blk = lru_net->findBlock(pkt->paddr, pkt->req->asid, lat); + } + if (blk) { + if (pkt->nic_pkt) { + NR_NP_hits++; + } else { + CR_NP_hits++; + } + } + } + + if (blk) { + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (!blk->isUsed && !pkt->nic_pkt) { + useByCPUCycleDist.sample(latency); + nicUseByCPUCycleTotal += latency; + nicBlksUsedByCPU++; + } + } + blk->isUsed = true; + + if (pkt->nic_pkt) { + DPRINTF(Split, "found block in partition %d\n", blk->part); + } + } + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid, int &lat) +{ + SplitBlk *blk = lru->findBlock(addr, asid, lat); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid, lat); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid, lat); + } + } + + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid) const +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid); + } + } + + return blk; +} + +SplitBlk* +Split::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + SplitBlk *blk; + + if (pkt->nic_pkt) { + DPRINTF(Split, "finding a replacement for nic_req\n"); + nic_repl++; + if (lifo && lifo_net) + blk = lifo_net->findReplacement(pkt, writebacks, + compress_blocks); + else if (lru_net) + blk = lru_net->findReplacement(pkt, writebacks, + compress_blocks); + // in this case, this is an LRU only cache, it's non partitioned + else + blk = lru->findReplacement(pkt, writebacks, compress_blocks); + } else { + DPRINTF(Split, "finding replacement for cpu_req\n"); + blk = lru->findReplacement(pkt, writebacks, + compress_blocks); + cpu_repl++; + } + + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (blk->isUsed) { + nicUsedWhenEvicted++; + usedEvictDist.sample(latency); + nicUsedTotLatency += latency; + nicUsedTotEvicted++; + } else { + nicUnusedWhenEvicted++; + unusedEvictDist.sample(latency); + nicUnusedTotLatency += latency; + nicUnusedTotEvicted++; + } + } else { + if (blk->isUsed) { + cpuUsedBlks++; + } else { + cpuUnusedBlks++; + } + } + + // blk attributes for the new blk coming IN + blk->ts = curTick; + blk->isNIC = (pkt->nic_pkt) ? true : false; + + return blk; +} + +void +Split::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) + blk = lifo_net->findBlock(addr, asid); + else if (lru_net) + blk = lru_net->findBlock(addr, asid); + + if (!blk) + return; + } + + blk->status = 0; + blk->isTouched = false; + tagsInUse--; +} + +void +Split::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + if (lru->probe(asid, source)) + lru->doCopy(source, dest, asid, writebacks); + else { + if (lifo && lifo_net) + lifo_net->doCopy(source, dest, asid, writebacks); + else if (lru_net) + lru_net->doCopy(source, dest, asid, writebacks); + } +} + +void +Split::cleanupRefs() +{ + lru->cleanupRefs(); + if (lifo && lifo_net) + lifo_net->cleanupRefs(); + else if (lru_net) + lru_net->cleanupRefs(); + + ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(), + ios::trunc); + + // this shouldn't be here but it happens at the end, which is what i want + memIter end = memHash.end(); + for (memIter iter = memHash.begin(); iter != end; ++iter) { + ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second); + } +} + +Addr +Split::regenerateBlkAddr(Addr tag, int set) const +{ + if (lifo_net) + return lifo_net->regenerateBlkAddr(tag, set); + else + return lru->regenerateBlkAddr(tag, set); +} + +Addr +Split::extractTag(Addr addr, SplitBlk *blk) const +{ + if (blk->part == 2) { + if (lifo_net) + return lifo_net->extractTag(addr); + else if (lru_net) + return lru_net->extractTag(addr); + else + panic("this shouldn't happen"); + } else + return lru->extractTag(addr); +} + diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh new file mode 100644 index 0000000000..6f2441597d --- /dev/null +++ b/src/mem/cache/tags/split.hh @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a split/partitioned tag store. + */ + +#ifndef __SPLIT_HH__ +#define __SPLIT_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" +#include "base/hashmap.hh" + +class BaseCache; +class SplitLRU; +class SplitLIFO; + +/** + * A cache tag store. + */ +class Split : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** Whether the 2nd partition (for the nic) is LIFO or not */ + const bool lifo; + /** The hit latency. */ + const int hitLatency; + + Addr blkMask; + + /** Number of NIC requests that hit in the NIC partition */ + Stats::Scalar<> NR_NP_hits; + /** Number of NIC requests that hit in the CPU partition */ + Stats::Scalar<> NR_CP_hits; + /** Number of CPU requests that hit in the NIC partition */ + Stats::Scalar<> CR_NP_hits; + /** Number of CPU requests that hit in the CPU partition */ + Stats::Scalar<> CR_CP_hits; + /** The number of nic replacements (i.e. misses) */ + Stats::Scalar<> nic_repl; + /** The number of cpu replacements (i.e. misses) */ + Stats::Scalar<> cpu_repl; + + //For latency studies + /** the number of NIC blks that were used before evicted */ + Stats::Scalar<> nicUsedWhenEvicted; + /** the total latency of used NIC blocks in the cache */ + Stats::Scalar<> nicUsedTotLatency; + /** the total number of used NIC blocks evicted */ + Stats::Scalar<> nicUsedTotEvicted; + /** the average number of cycles a used NIC blk is in the cache */ + Stats::Formula nicUsedAvgLatency; + /** the Distribution of used NIC blk eviction times */ + Stats::Distribution<> usedEvictDist; + + /** the number of NIC blks that were unused before evicted */ + Stats::Scalar<> nicUnusedWhenEvicted; + /** the total latency of unused NIC blks in the cache */ + Stats::Scalar<> nicUnusedTotLatency; + /** the total number of unused NIC blocks evicted */ + Stats::Scalar<> nicUnusedTotEvicted; + /** the average number of cycles an unused NIC blk is in the cache */ + Stats::Formula nicUnusedAvgLatency; + /** the Distribution of unused NIC blk eviction times */ + Stats::Distribution<> unusedEvictDist; + + /** The total latency of NIC blocks to 1st usage time by CPU */ + Stats::Scalar<> nicUseByCPUCycleTotal; + /** The total number of NIC blocks used */ + Stats::Scalar<> nicBlksUsedByCPU; + /** the average number of cycles before a NIC blk that is used gets used by CPU */ + Stats::Formula nicAvgUsageByCPULatency; + /** the Distribution of cycles time before a NIC blk is used by CPU*/ + Stats::Distribution<> useByCPUCycleDist; + + /** the number of CPU blks that were used before evicted */ + Stats::Scalar<> cpuUsedBlks; + /** the number of CPU blks that were unused before evicted */ + Stats::Scalar<> cpuUnusedBlks; + + /** the avg number of cycles before a NIC blk is evicted */ + Stats::Formula nicAvgLatency; + + typedef m5::hash_map > hash_t; + typedef hash_t::const_iterator memIter; + hash_t memHash; + + + private: + SplitLRU *lru; + SplitLRU *lru_net; + SplitLIFO *lifo_net; + + public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency); + + /** + * Destructor + */ + virtual ~Split(); + + /** + * Register the stats for this object + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of Split it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The memory request whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk The block to find the partition it's in + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const; + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + panic("should never call this!\n"); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr) (blkMask)); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const; + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * Split does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh new file mode 100644 index 0000000000..f38516180a --- /dev/null +++ b/src/mem/cache/tags/split_blk.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of partitioned tag store cache block class. + */ + +#ifndef __SPLIT_BLK_HH__ +#define __SPLIT_BLK_HH__ + +#include "mem/cache/cache_blk.hh" // base class + +/** + * Split cache block. + */ +class SplitBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; + /** Has this block been used after being brought in? (for LIFO partition) */ + bool isUsed; + /** is this blk a NIC block? (i.e. requested by the NIC) */ + bool isNIC; + /** timestamp of the arrival of this block into the cache */ + Tick ts; + /** the previous block in the LIFO partition (brought in before than me) */ + SplitBlk *prev; + /** the next block in the LIFO partition (brought in later than me) */ + SplitBlk *next; + /** which partition this block is in */ + int part; + + SplitBlk() + : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL), + part(0) + {} +}; + +#endif + diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc new file mode 100644 index 0000000000..f2c37c80d3 --- /dev/null +++ b/src/mem/cache/tags/split_lifo.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LIFO tag store usable in a partitioned cache. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "sim/root.hh" +#include "base/trace.hh" + +using namespace std; + +SplitBlk* +LIFOSet::findBlk(int asid, Addr tag) const +{ + for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) { + if (blk->tag == tag && blk->isValid()) { + return blk; + } + } + return NULL; +} + +void +LIFOSet::moveToLastIn(SplitBlk *blk) +{ + if (blk == lastIn) + return; + + if (blk == firstIn) { + blk->next->prev = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = NULL; + blk->prev = lastIn; + lastIn->next = blk; + + lastIn = blk; +} + +void +LIFOSet::moveToFirstIn(SplitBlk *blk) +{ + if (blk == firstIn) + return; + + if (blk == lastIn) { + blk->prev->next = NULL; + } else { + blk->next->prev = blk->prev; + blk->prev->next = blk->next; + } + + blk->prev = NULL; + blk->next = firstIn; + firstIn->prev = blk; + + firstIn = blk; +} + +// create and initialize a LIFO cache structure +SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) : + blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways), + hitLatency(_hit_latency), twoQueue(two_Queue), part(_part) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) must be a power of 2"); + if (!(hitLatency > 0)) + fatal("access latency in cycles must be at least on cycle"); + if (_ways == 0) + fatal("if instantiating a splitLIFO, needs non-zero size!"); + + + SplitBlk *blk; + int i, j, blkIndex; + + setShift = floorLog2(blkSize); + blkMask = blkSize - 1; + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = size/blkSize; + + // allocate data blocks + blks = new SplitBlk[numBlks]; + sets = new LIFOSet[numSets]; + dataBlks = new uint8_t[size]; + +/* + // these start off point to same blk + top = &(blks[0]); + head = top; +*/ + + blkIndex = 0; + for (i=0; i < numSets; ++i) { + sets[i].ways = ways; + sets[i].lastIn = &blks[blkIndex]; + sets[i].firstIn = &blks[blkIndex + ways - 1]; + + /* 3 cases: if there is 1 way, if there are 2 ways, or if there are 3+. + in the case of 1 way, last in and first out point to the same blocks, + and the next and prev pointers need to be assigned specially. and so on + */ + /* deal with the first way */ + blk = &blks[blkIndex]; + blk->prev = &blks[blkIndex + 1]; + blk->next = NULL; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + + /* if there are "middle" ways, do them here */ + if (ways > 2) { + for (j=1; j < ways-1; ++j) { + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->prev = &blks[blkIndex+1]; + blk->next = &blks[blkIndex-1]; + blk->data = &(dataBlks[blkSize*blkIndex]); + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } + } + + /* do the final way here, depending on whether the final way is the only + way or not + */ + if (ways > 1) { + blk = &blks[blkIndex]; + blk->prev = NULL; + blk->next = &blks[blkIndex - 1]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } else { + blk->prev = NULL; + } + } + assert(blkIndex == numBlks); +} + +SplitLIFO::~SplitLIFO() +{ + delete [] blks; + delete [] sets; + delete [] dataBlks; +} + +void +SplitLIFO::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; + + invalidations + .name(name + ".invalidations") + .desc("number of invalidations in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLIFO::probe(int asid, Addr addr) const +{ + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk* blk = sets[myset].findBlk(asid, tag); + return (blk != NULL); +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + lat = hitLatency; + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) + lat = blk->whenReady - curTick; + blk->refCount +=1; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + lat = hitLatency; + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + return blk; +} + +SplitBlk* +SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + + SplitBlk *firstIn = sets[set].firstIn; + SplitBlk *lastIn = sets[set].lastIn; + + SplitBlk *blk; + if (twoQueue && firstIn->isUsed) { + blk = firstIn; + blk->isUsed = false; + sets[set].moveToLastIn(blk); + } else { + int withValue = sets[set].withValue; + if (withValue == ways) { + blk = lastIn; + } else { + blk = &(sets[set].firstIn[ways - ++withValue]); + } + } + + DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", + pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLIFO::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + invalidations++; + } +} + +void +SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +SplitLIFO::cleanupRefs() +{ + for (int i = 0; i < numBlks; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh new file mode 100644 index 0000000000..c50eaa53db --- /dev/null +++ b/src/mem/cache/tags/split_lifo.hh @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LIFO tag store usable in a partitioned cache. + */ + +#ifndef __SPLIT_LIFO_HH__ +#define __SPLIT_LIFO_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include "base/hashmap.hh" +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * A LIFO set of cache blks + */ +class LIFOSet { + public: + /** the number of blocks in this set */ + int ways; + + /** Cache blocks in this set, maintained in LIFO order where + 0 = Last in (head) */ + SplitBlk *lastIn; + SplitBlk *firstIn; + + /** has the initial "filling" of this set finished? i.e., have you had + * 'ways' number of compulsory misses in this set yet? if withValue == ways, + * then yes. withValue is meant to be the number of blocks in the set that have + * gone through their first compulsory miss. + */ + int withValue; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag the Tag you are looking for + * @return Pointer to the block, if found, NULL otherwise + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + void moveToLastIn(SplitBlk *blk); + void moveToFirstIn(SplitBlk *blk); + + LIFOSet() + : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0) + {} +}; + +/** + * A LIFO cache tag store. + */ +class SplitLIFO : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of bytes in a block. */ + const int blkSize; + /** the size of the cache in bytes */ + const int size; + /** the number of blocks in the cache */ + const int numBlks; + /** the number of sets in the cache */ + const int numSets; + /** the number of ways in the cache */ + const int ways; + /** The hit latency. */ + const int hitLatency; + /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */ + const bool twoQueue; + /** indicator for which partition this is */ + const int part; + + /** The cache blocks. */ + SplitBlk *blks; + /** The Cache sets */ + LIFOSet *sets; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + + /** the number of hit in this partition */ + Stats::Scalar<> hits; + /** the number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + /** the number of invalidations in this partition */ + Stats::Scalar<> invalidations; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part); + + /** + * Destructor + */ + virtual ~SplitLIFO(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LIFO it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LIFO does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc new file mode 100644 index 0000000000..ea5b92d6f2 --- /dev/null +++ b/src/mem/cache/tags/split_lru.cc @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LRU tag store for a partitioned cache. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lru.hh" +#include "sim/root.hh" + +using namespace std; + +SplitBlk* +SplitCacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +SplitCacheSet::moveToHead(SplitBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + SplitBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + SplitBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + SplitBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new SplitCacheSet[numSets]; + blks = new SplitBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new SplitBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->req->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + blk->part = part; + } + } +} + +SplitLRU::~SplitLRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +void +SplitLRU::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +SplitBlk* +SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + // grab a replacement candidate + SplitBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLRU::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +SplitLRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh new file mode 100644 index 0000000000..1c0fc8600a --- /dev/null +++ b/src/mem/cache/tags/split_lru.hh @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LRU tag store for a partitioned cache. + */ + +#ifndef __SPLIT_LRU_HH__ +#define __SPLIT_LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * An associative set of cache blocks. + */ + +class SplitCacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + SplitBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(SplitBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class SplitLRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + /** indicator for which partition this is */ + const int part; + + /** The cache sets. */ + SplitCacheSet *sets; + + /** The cache blocks. */ + SplitBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + /** number of hits in this partition */ + Stats::Scalar<> hits; + /** number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part); + + /** + * Destructor + */ + virtual ~SplitLRU(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif From fc281d0b64fca8d2809ec462148acb7cf0461ea5 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 14:35:00 -0400 Subject: [PATCH 074/152] Backing in more changsets, getting closer to compile base_cache.cc compiles, continuing on src/SConscript: Add in compilation flags for cache files src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: Back in more fixes, now base_cache compiles src/mem/cache/cache.hh: src/mem/cache/cache_blk.hh: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/blocking_buffer.hh: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/miss_queue.hh: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: src/mem/cache/miss/mshr_queue.hh: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/iic.cc: src/mem/cache/tags/lru.cc: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lru.cc: src/mem/packet.cc: src/mem/packet.hh: src/mem/request.hh: Backing in more changsets, getting closer to compile --HG-- extra : convert_revision : ac2dcda39f8d27baffc4db1df17b9a1fcce5b6ed --- src/SConscript | 25 +++++++++ src/mem/cache/base_cache.cc | 52 ++++++++++--------- src/mem/cache/base_cache.hh | 29 +++++++---- src/mem/cache/cache.hh | 7 ++- src/mem/cache/cache_blk.hh | 4 -- src/mem/cache/cache_impl.hh | 26 +++++----- src/mem/cache/coherence/coherence_protocol.cc | 4 +- src/mem/cache/miss/blocking_buffer.cc | 22 ++++---- src/mem/cache/miss/blocking_buffer.hh | 7 ++- src/mem/cache/miss/miss_queue.cc | 36 ++++++------- src/mem/cache/miss/miss_queue.hh | 4 +- src/mem/cache/miss/mshr.cc | 11 ++-- src/mem/cache/miss/mshr.hh | 2 +- src/mem/cache/miss/mshr_queue.cc | 6 +-- src/mem/cache/miss/mshr_queue.hh | 4 +- src/mem/cache/prefetch/base_prefetcher.cc | 4 +- src/mem/cache/tags/fa_lru.cc | 4 +- src/mem/cache/tags/iic.cc | 4 +- src/mem/cache/tags/lru.cc | 4 +- src/mem/cache/tags/split_lifo.cc | 4 +- src/mem/cache/tags/split_lru.cc | 4 +- src/mem/config/cache.hh | 42 +++++++++++++++ src/mem/packet.cc | 13 +++++ src/mem/packet.hh | 22 +++++++- src/mem/request.hh | 3 ++ 25 files changed, 225 insertions(+), 118 deletions(-) create mode 100644 src/mem/config/cache.hh diff --git a/src/SConscript b/src/SConscript index 124f88708a..ff41e59316 100644 --- a/src/SConscript +++ b/src/SConscript @@ -101,6 +101,31 @@ base_sources = Split(''' mem/physical.cc mem/port.cc + mem/cache/base_cache.cc + mem/cache/cache.cc + mem/cache/cache_builder.cc + mem/cache/coherence/coherence_protocol.cc + mem/cache/coherence/uni_coherence.cc + mem/cache/miss/blocking_buffer.cc + mem/cache/miss/miss_queue.cc + mem/cache/miss/mshr.cc + mem/cache/miss/mshr_queue.cc + mem/cache/prefetch/base_prefetcher.cc + mem/cache/prefetch/ghb_prefetcher.cc + mem/cache/prefetch/prefetcher.cc + mem/cache/prefetch/stride_prefetcher.cc + mem/cache/prefetch/tagged_prefetcher.cc + mem/cache/tags/base_tags.cc + mem/cache/tags/cache_tags.cc + mem/cache/tags/fa_lru.cc + mem/cache/tags/iic/cc + mem/cache/tags/lru.cc + mem/cache/tags/repl/gen.cc + mem/cache/tags/repl/repl.cc + mem/cache/tags/split.cc + mem/cache/tags/split_lifo.cc + mem/cache/tags/split_lru.cc + sim/builder.cc sim/debug.cc sim/eventq.cc diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 10a49edb1f..89e23ce318 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -45,11 +45,11 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, { blocked = false; //Start ports at null if more than one is created we should panic - cpuSidePort = NULL; - memSidePort = NULL; + //cpuSidePort = NULL; + //memSidePort = NULL; } -bool +void BaseCache::CachePort::recvStatusChange(Port::Status status) { cache->recvStatusChange(status, isCpuSide); @@ -121,12 +121,16 @@ BaseCache::getPort(const std::string &if_name) void BaseCache::regStats() { + Request temp_req; + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + using namespace Stats; // Hit statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); hits[access_idx] .init(maxThreadsPerCPU) @@ -141,20 +145,20 @@ BaseCache::regStats() .desc("number of demand (read+write) hits") .flags(total) ; - demandHits = hits[Read] + hits[Write]; + demandHits = hits[Packet::ReadReq] + hits[Packet::WriteReq]; overallHits .name(name() + ".overall_hits") .desc("number of overall hits") .flags(total) ; - overallHits = demandHits + hits[Soft_Prefetch] + hits[Hard_Prefetch] - + hits[Writeback]; + overallHits = demandHits + hits[Packet::SoftPFReq] + hits[Packet::HardPFReq] + + hits[Packet::Writeback]; // Miss statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); misses[access_idx] .init(maxThreadsPerCPU) @@ -169,20 +173,20 @@ BaseCache::regStats() .desc("number of demand (read+write) misses") .flags(total) ; - demandMisses = misses[Read] + misses[Write]; + demandMisses = misses[Packet::ReadReq] + misses[Packet::WriteReq]; overallMisses .name(name() + ".overall_misses") .desc("number of overall misses") .flags(total) ; - overallMisses = demandMisses + misses[Soft_Prefetch] + - misses[Hard_Prefetch] + misses[Writeback]; + overallMisses = demandMisses + misses[Packet::SoftPFReq] + + misses[Packet::HardPFReq] + misses[Packet::Writeback]; // Miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); missLatency[access_idx] .init(maxThreadsPerCPU) @@ -197,20 +201,20 @@ BaseCache::regStats() .desc("number of demand (read+write) miss cycles") .flags(total) ; - demandMissLatency = missLatency[Read] + missLatency[Write]; + demandMissLatency = missLatency[Packet::ReadReq] + missLatency[Packet::WriteReq]; overallMissLatency .name(name() + ".overall_miss_latency") .desc("number of overall miss cycles") .flags(total) ; - overallMissLatency = demandMissLatency + missLatency[Soft_Prefetch] + - missLatency[Hard_Prefetch]; + overallMissLatency = demandMissLatency + missLatency[Packet::SoftPFReq] + + missLatency[Packet::HardPFReq]; // access formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); accesses[access_idx] .name(name() + "." + cstr + "_accesses") @@ -237,8 +241,8 @@ BaseCache::regStats() // miss rate formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); missRate[access_idx] .name(name() + "." + cstr + "_miss_rate") @@ -265,8 +269,8 @@ BaseCache::regStats() // miss latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMissLatency[access_idx] .name(name() + "." + cstr + "_avg_miss_latency") diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 0170b02494..977e0ae297 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -47,6 +47,7 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" /** * Reasons for Caches to be Blocked. @@ -82,7 +83,7 @@ class BaseCache : public MemObject public: CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); - private: + protected: virtual bool recvTiming(Packet *pkt); virtual Tick recvAtomic(Packet *pkt); @@ -96,6 +97,7 @@ class BaseCache : public MemObject virtual int deviceBlockSize(); + public: void setBlocked(); void clearBlocked(); @@ -110,10 +112,10 @@ class BaseCache : public MemObject Packet *pkt; CachePort *cachePort; - CacheResponseEvent(Packet *pkt, CachePort *cachePort); + CacheEvent(Packet *pkt, CachePort *cachePort); void process(); const char *description(); - } + }; protected: CachePort *cpuSidePort; @@ -124,7 +126,7 @@ class BaseCache : public MemObject private: //To be defined in cache_impl.hh not in base class - virtual bool doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide); + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); virtual void recvStatusChange(Port::Status status, bool isCpuSide); @@ -275,12 +277,14 @@ class BaseCache : public MemObject * of this cache. * @param params The parameter object for this BaseCache. */ - BaseCache(const std::string &name, HierParams *hier_params, Params ¶ms) - : BaseMem(name, hier_params, params.hitLatency, params.addrRange), - blocked(0), blockedSnoop(0), masterRequests(0), slaveRequests(0), - topLevelCache(false), blkSize(params.blkSize), + BaseCache(const std::string &name, Params ¶ms) + : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0), + slaveRequests(0), topLevelCache(false), blkSize(params.blkSize), missCount(params.maxMisses) { + //Start ports at null if more than one is created we should panic + cpuSidePort = NULL; + memSidePort = NULL; } /** @@ -453,8 +457,8 @@ class BaseCache : public MemObject */ void respondToMiss(Packet *pkt, Tick time) { - if (!pkt->isUncacheable()) { - missLatency[pkt->cmd.toIndex()][pkt->thread_num] += time - pkt->time; + if (!pkt->req->isUncacheable()) { + missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; } assert("Implement\n" && 0); // si->respond(pkt,time); @@ -475,6 +479,11 @@ class BaseCache : public MemObject * to do for a cache. */ void rangeChange() {} + + void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) + { + panic("Unimplimented\n"); + } }; #endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index dcb22a99c9..78e87479bb 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -46,7 +46,6 @@ // forward declarations class Bus; -class ExecContext; /** * A template-policy based cache. The behavior of the cache can be altered by @@ -209,11 +208,11 @@ class Cache : public BaseCache /** * Aquash all requests associated with specified thread. * intended for use by I-cache. - * @param thread_number The thread to squash. + * @param req->getThreadNum()ber The thread to squash. */ - void squash(int thread_number) + void squash(int threadNum) { - missQueue->squash(thread_number); + missQueue->squash(threadNum); } /** diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index cf1bd20e2e..02fdd7a514 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -37,7 +37,6 @@ #include "sim/root.hh" // for Tick #include "arch/isa_traits.hh" // for Addr -#include "cpu/exec_context.hh" /** * Cache block status bit assignments @@ -88,9 +87,6 @@ class CacheBlk /** Which curTick will this block be accessable */ Tick whenReady; - /** Save the exec context so that writebacks can use them. */ - ExecContext *xc; - /** * The set this block belongs to. * @todo Move this into subclasses when we fix CacheTags to use them. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3dd8d74cde..3dc95af68f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -175,7 +175,7 @@ Cache::access(Packet &pkt) //We are determining prefetches on access stream, call prefetcher prefetcher->handleMiss(pkt, curTick); } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() && !pkt->cmd.isWrite()) { //Upgrade or Invalidate @@ -220,7 +220,7 @@ Cache::access(Packet &pkt) pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); if (blk) { // Hit - hits[pkt->cmd.toIndex()][pkt->thread_num]++; + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; // clear dirty bit if write through if (!pkt->cmd.isNoResponse()) respond(pkt, curTick+lat); @@ -228,8 +228,8 @@ Cache::access(Packet &pkt) } // Miss - if (!pkt->isUncacheable()) { - misses[pkt->cmd.toIndex()][pkt->thread_num]++; + if (!pkt->req->isUncacheable()) { + misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; /** @todo Move miss count code into BaseCache */ if (missCount) { --missCount; @@ -248,8 +248,8 @@ Cache::getPacket() { Packet * pkt = missQueue->getPacket(); if (pkt) { - if (!pkt->isUncacheable()) { - if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->thread_num]++; + if (!pkt->req->isUncacheable()) { + if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->req->getThreadNum()]++; BlkType *blk = tags->findBlock(pkt); Packet::Command cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); @@ -272,7 +272,7 @@ Cache::sendResult(MemPktPtr &pkt, bool success) if (pkt->cmd == Upgrade) { handleResponse(pkt); } - } else if (pkt && !pkt->isUncacheable()) { + } else if (pkt && !pkt->req->isUncacheable()) { missQueue->restoreOrigCmd(pkt); } } @@ -394,7 +394,7 @@ Cache::snoop(Packet * &pkt) for (int i=0; ipkt->isUncacheable()) { + if (!mshr->pkt->req->isUncacheable()) { if (pkt->cmd.isRead()) { //Only Upgrades don't get here //Supply the data @@ -469,7 +469,7 @@ Cache::probe(Packet * &pkt, bool update) { MemDebug::cacheProbe(pkt); - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() && !pkt->cmd.isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward @@ -583,7 +583,7 @@ Cache::probe(Packet * &pkt, bool update) // Can't handle it, return pktuest unsatisfied. return 0; } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill Packet * busPkt = new MemPkt(); busPkt->paddr = blk_addr; @@ -596,7 +596,7 @@ Cache::probe(Packet * &pkt, bool update) busPkt->req->asid = pkt->req->asid; busPkt->xc = pkt->xc; - busPkt->thread_num = pkt->thread_num; + busPkt->req->setThreadNum() = pkt->req->getThreadNum(); busPkt->time = curTick; lat = mi->sendProbe(busPkt, update); @@ -606,7 +606,7 @@ Cache::probe(Packet * &pkt, bool update) return 0; } - misses[pkt->cmd.toIndex()][pkt->thread_num]++; + misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; CacheBlk::State old_state = (blk) ? blk->status : 0; tags->handleFill(blk, busPkt, @@ -631,7 +631,7 @@ Cache::probe(Packet * &pkt, bool update) } if (update) { - hits[pkt->cmd.toIndex()][pkt->thread_num]++; + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } else if (pkt->cmd.isWrite()) { // Still need to change data in all locations. return mi->sendProbe(pkt, update); diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 107fd2502b..9d5b8ef546 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -465,7 +465,7 @@ CacheBlk::State CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) { CacheBlk::State state = oldState & stateMask; - int cmd_idx = pkt->cmd.toIndex(); + int cmd_idx = pkt->cmdToIndex(); assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); @@ -498,7 +498,7 @@ CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt, } CacheBlk::State state = blk->status & stateMask; - int cmd_idx = pkt->cmd.toIndex(); + int cmd_idx = pkt->cmdToIndex(); assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 621855c3d1..912a0f5bd0 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -73,7 +73,7 @@ void BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) { Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); - if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || pkt->cmd.isNoResponse())) { if (pkt->cmd.isNoResponse()) { wb.allocateAsBuffer(pkt); @@ -93,7 +93,7 @@ BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) } else { miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { miss.pkt->flags |= CACHE_LINE_FILL; } cache->setBlocked(Blocked_NoMSHRs); @@ -186,12 +186,12 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) } void -BlockingBuffer::squash(int thread_number) +BlockingBuffer::squash(int req->getThreadNum()ber) { - if (miss.threadNum == thread_number) { + if (miss.setThreadNum() == req->getThreadNum()ber) { Packet * target = miss.getTarget(); miss.popTarget(); - assert(target->thread_num == thread_number); + assert(target->req->setThreadNum() == req->getThreadNum()ber); if (target->completionEvent != NULL) { delete target->completionEvent; } @@ -207,7 +207,7 @@ BlockingBuffer::squash(int thread_number) } void -BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, +BlockingBuffer::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { @@ -224,18 +224,14 @@ BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, * @todo Need to find a way to charge the writeback to the "correct" * thread. */ - pkt->xc = xc; - if (xc) - pkt->thread_num = xc->getThreadNum(); - else - pkt->thread_num = 0; + pkt->req->setThreadNum() = 0; pkt->cmd = Writeback; if (compressed) { pkt->flags |= COMPRESSED; } - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); cache->setMasterRequest(Request_WB, curTick); @@ -247,7 +243,7 @@ BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, void BlockingBuffer::doWriteback(Packet * &pkt) { - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 52256be742..08814b43e6 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -164,9 +164,9 @@ public: /** * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. - * @param thread_number The thread number of the requests to squash. + * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Return the current number of outstanding misses. @@ -212,12 +212,11 @@ public: * Perform a writeback of dirty data to the given address. * @param addr The address to write to. * @param asid The address space id. - * @param xc The execution context of the address space. * @param size The number of bytes to write. * @param data The data to write, can be NULL. * @param compressed True if the data is compressed. */ - void doWriteback(Addr addr, int asid, ExecContext *xc, + void doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed); /** diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 7902fbcee9..d02f27d52a 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -347,7 +347,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) { MSHR* mshr = mq.allocate(pkt, size); mshr->order = order++; - if (!pkt->isUncacheable() ){//&& !pkt->isNoAllocate()) { + if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { // Mark this as a cache line fill mshr->pkt->flags |= CACHE_LINE_FILL; } @@ -399,13 +399,13 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) int size = blkSize; Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); MSHR* mshr = NULL; - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { mshr = mq.findMatch(blkAddr, pkt->req->asid); if (mshr) { //@todo remove hw_pf here - mshr_hits[pkt->cmd.toIndex()][pkt->thread_num]++; - if (mshr->threadNum != pkt->thread_num) { - mshr->threadNum = -1; + mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + if (mshr->getThreadNum() != pkt->req->getThreadNum()) { + mshr->setThreadNum() = -1; } mq.allocateTarget(mshr, pkt); if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { @@ -424,14 +424,14 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) mshr_no_allocate_misses++; } else { - mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } } else { //Count uncacheable accesses - mshr_uncacheable[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; size = pkt->size; } - if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || pkt->cmd.isNoResponse())) { /** * @todo Add write merging here. @@ -489,7 +489,7 @@ MissQueue::getPacket() pkt = prefetcher->getPacket(); if (pkt) { //Update statistic on number of prefetches issued (hwpf_mshr_misses) - mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; //It will request the bus for the future, but should clear that immedieatley allocateMiss(pkt, pkt->size, curTick); pkt = mq.getReq(); @@ -582,7 +582,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) BlockedCause cause = NUM_BLOCKED_CAUSES; if (pkt->isCacheFill() && !pkt->isNoAllocate()) { - mshr_miss_latency[mshr->originalCmd][pkt->thread_num] += + mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] += curTick - pkt->time; // targets were handled in the cache tags if (mshr == noTargetMSHR) { @@ -608,11 +608,11 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } } } else { - if (pkt->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd][pkt->thread_num] += + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] += curTick - pkt->time; } - if (mshr->hasTargets() && pkt->isUncacheable()) { + if (mshr->hasTargets() && pkt->req->isUncacheable()) { // Should only have 1 target if we had any assert(num_targets == 1); Packet * target = mshr->getTarget(); @@ -660,12 +660,12 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } void -MissQueue::squash(int thread_number) +MissQueue::squash(int req->getThreadNum()ber) { bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; - if (noTargetMSHR && noTargetMSHR->threadNum == thread_number) { + if (noTargetMSHR && noTargetMSHR->setThreadNum() == req->getThreadNum()ber) { noTargetMSHR = NULL; unblock = true; cause = Blocked_NoTargets; @@ -674,7 +674,7 @@ MissQueue::squash(int thread_number) unblock = true; cause = Blocked_NoMSHRs; } - mq.squash(thread_number); + mq.squash(req->getThreadNum()ber); if (!mq.havePending()) { cache->clearMasterRequest(Request_MSHR); } @@ -704,7 +704,7 @@ MissQueue::doWriteback(Addr addr, int asid, Packet * pkt = buildWritebackReq(addr, asid, size, data, compressed); - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); } @@ -713,7 +713,7 @@ MissQueue::doWriteback(Addr addr, int asid, void MissQueue::doWriteback(Packet * &pkt) { - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); } diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index ce827fe812..d459821086 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -268,9 +268,9 @@ class MissQueue /** * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. - * @param thread_number The thread number of the requests to squash. + * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 73aeaf6cae..5c3c9fd1d0 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -50,7 +50,7 @@ MSHR::MSHR() { inService = false; ntargets = 0; - threadNum = -1; + setThreadNum() = -1; } void @@ -68,7 +68,7 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, pkt->data = new uint8_t[size]; pkt->senderState = this; //Set the time here for latency calculations - //pkt->time = curTick; + pkt->time = curTick; if (target) { pkt->req = target->req; @@ -85,7 +85,7 @@ MSHR::allocateAsBuffer(Packet * &target) { addr = target->paddr; asid = target->req->asid; - threadNum = target->thread_num; + setThreadNum() = target->req->getThreadNum(); pkt = new Packet(); pkt->addr = target->addr; pkt->dest = target->dest; @@ -94,6 +94,7 @@ MSHR::allocateAsBuffer(Packet * &target) pkt->req = target->req; pkt->data = new uint8_t[target->size]; pkt->senderState = this; + pkt->time = curTick; } void @@ -161,14 +162,14 @@ MSHR::dump() "inService: %d thread: %d\n" "Addr: %x asid: %d ntargets %d\n" "Targets:\n", - inService, threadNum, addr, asid, ntargets); + inService, getThreadNum(), addr, asid, ntargets); TargetListIterator tar_it = targets.begin(); for (int i = 0; i < ntargets; i++) { assert(tar_it != targets.end()); ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", - i, (*tar_it)->paddr, (*tar_it)->cmd.toIndex()); + i, (*tar_it)->paddr, (*tar_it)->cmdToIndex()); tar_it++; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 167aa26cd1..3bd6d36d15 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -66,7 +66,7 @@ class MSHR { /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ - int threadNum; + int getThreadNum(); /** The request that is forwarded to the next level of the hierarchy. */ Packet * pkt; /** The number of currently allocated targets. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 72c8cc4981..ced43d30af 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -237,18 +237,18 @@ MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) } void -MSHRQueue::squash(int thread_number) +MSHRQueue::squash(int req->getThreadNum()ber) { MSHR::Iterator i = allocatedList.begin(); MSHR::Iterator end = allocatedList.end(); for (; i != end;) { MSHR *mshr = *i; - if (mshr->threadNum == thread_number) { + if (mshr->setThreadNum() == req->getThreadNum()ber) { while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - assert(target->thread_num == thread_number); + assert(target->req->setThreadNum() == req->getThreadNum()ber); if (target->completionEvent != NULL) { delete target->completionEvent; } diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 3e1d3f39f0..563368d292 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -190,9 +190,9 @@ class MSHRQueue { /** * Squash outstanding requests with the given thread number. If a request * is in service, just squashes the targets. - * @param thread_number The thread to squash. + * @param req->getThreadNum()ber The thread to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Returns true if the pending list is not empty. diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 14beef2601..7b2d57cd53 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -132,7 +132,7 @@ BasePrefetcher::getPacket() void BasePrefetcher::handleMiss(Packet * &pkt, Tick time) { - if (!pkt->isUncacheable() && !(pkt->isInstRead() && only_data)) + if (!pkt->req->isUncacheable() && !(pkt->isInstRead() && only_data)) { //Calculate the blk address Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); @@ -185,7 +185,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) prefetch->xc = pkt->xc; prefetch->data = new uint8_t[blkSize]; prefetch->req->asid = pkt->req->asid; - prefetch->thread_num = pkt->thread_num; + prefetch->req->setThreadNum() = pkt->req->getThreadNum(); prefetch->time = time + (*delay); //@todo ADD LATENCY HERE //... initialize diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 66d91b35b6..43ab363095 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -264,8 +264,8 @@ FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, tagHash.erase(blk->tag); tagHash[blkAlign(pkt->paddr)] = blk; if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; } else { tagsInUse++; blk->isTouched = true; diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index a574adaa32..f4641401fc 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -418,8 +418,8 @@ IIC::freeReplacementBlock(PacketList* & writebacks) tag_ptr->isModified() ? "writeback" : "clean"); /* write back replaced block data */ if (tag_ptr && (tag_ptr->isValid())) { - int thread_num = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += tag_ptr->refCount; ++sampledRefs; tag_ptr->refCount = 0; diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 0fe88fd087..19a52aade7 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -225,8 +225,8 @@ LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, LRUBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc index f2c37c80d3..c6bb91eff1 100644 --- a/src/mem/cache/tags/split_lifo.cc +++ b/src/mem/cache/tags/split_lifo.cc @@ -317,8 +317,8 @@ SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc index ea5b92d6f2..4b7f4c1147 100644 --- a/src/mem/cache/tags/split_lru.cc +++ b/src/mem/cache/tags/split_lru.cc @@ -244,8 +244,8 @@ SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, SplitBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/config/cache.hh b/src/mem/config/cache.hh new file mode 100644 index 0000000000..24da040219 --- /dev/null +++ b/src/mem/config/cache.hh @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/** + * @file + * Central location to configure which cache types we want to build + * into the simulator. In the future, this should probably be + * autogenerated by some sort of configuration script. + */ +#define USE_CACHE_LRU 1 +#define USE_CACHE_FALRU 1 +// #define USE_CACHE_SPLIT 1 +// #define USE_CACHE_SPLIT_LIFO 1 +#define USE_CACHE_IIC 1 + diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 56dd2bdfad..91298df8ce 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -57,6 +57,19 @@ Packet::cmdString() const } } +const std::string & +Packet::cmdIdxToString(Packet::Command idx) +{ + switch (idx) { + case ReadReq: return ReadReqString; + case WriteReq: return WriteReqString; + case WriteReqNoAck: return WriteReqNoAckString; + case ReadResp: return ReadRespString; + case WriteResp: return WriteRespString; + default: return OtherCmdString; + } +} + /** delete the data pointed to in the data pointer. Ok to call to matter how * data was allocted. */ void diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 403039d966..176c6f793b 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -46,6 +46,10 @@ struct Packet; typedef Packet* PacketPtr; typedef uint8_t* PacketDataPtr; +//For statistics we need max number of commands, hard code it at +//20 for now. @todo fix later +#define NUM_MEM_CMDS 1 << 9 + /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -102,6 +106,9 @@ class Packet public: + /** Used to calculate latencies for each packet.*/ + Tick time; + /** The special destination address indicating that the packet * should be routed based on its address. */ static const short Broadcast = -1; @@ -149,6 +156,8 @@ class Packet IsRequest = 1 << 4, IsResponse = 1 << 5, NeedsResponse = 1 << 6, + IsSWPrefetch = 1 << 7, + IsHWPrefetch = 1 << 8 }; public: @@ -159,13 +168,24 @@ class Packet WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, ReadResp = IsRead | IsResponse, - WriteResp = IsWrite | IsResponse + WriteResp = IsWrite | IsResponse, + Writeback = IsWrite | IsRequest, + SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, + HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, + HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse }; /** Return the string name of the cmd field (for debugging and * tracing). */ const std::string &cmdString() const; + /** Reutrn the string to a cmd given by idx. */ + const std::string &cmdIdxToString(Command idx); + + /** Return the index of this command. */ + inline int cmdToIndex() const { return (int) cmd; } + /** The command field of the packet. */ Command cmd; diff --git a/src/mem/request.hh b/src/mem/request.hh index af1d6d8a85..469184b130 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -224,6 +224,9 @@ class Request /** Accessor function for pc.*/ Addr getPC() { assert(validPC); return pc; } + /** Accessor Function to Check Cacheability. */ + bool isUncacheable() { return getFlags() & UNCACHEABLE; } + friend class Packet; }; From 0d323c753d897bec72884089bc0dc334a64e9eb3 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 17:28:33 -0400 Subject: [PATCH 075/152] More Changes, working towards cache.cc compiling. Headers cleaned up. src/mem/cache/cache_blk.hh: Remove XC --HG-- extra : convert_revision : aa2c43e4412ebb93165e12f693d5126983cfd0dc --- src/mem/cache/cache.hh | 7 +- src/mem/cache/cache_blk.hh | 3 +- src/mem/cache/cache_impl.hh | 183 +++++++++--------- src/mem/cache/coherence/coherence_protocol.hh | 4 +- src/mem/cache/coherence/simple_coherence.hh | 6 +- src/mem/cache/coherence/uni_coherence.hh | 11 +- src/mem/cache/miss/blocking_buffer.hh | 2 +- src/mem/cache/miss/miss_queue.hh | 2 +- src/mem/cache/miss/mshr_queue.hh | 2 +- src/mem/packet.hh | 17 +- src/mem/request.hh | 1 + 11 files changed, 124 insertions(+), 114 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 78e87479bb..587faaf511 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -138,7 +138,7 @@ class Cache : public BaseCache }; /** Instantiates a basic cache object. */ - Cache(const std::string &_name, HierParams *hier_params, Params ¶ms); + Cache(const std::string &_name, Params ¶ms); void regStats(); @@ -147,7 +147,7 @@ class Cache : public BaseCache * @param req The request to perform. * @return The result of the access. */ - MemAccessResult access(Packet * &pkt); + bool access(Packet * &pkt); /** * Selects a request to send on the bus. @@ -233,7 +233,8 @@ class Cache : public BaseCache */ void respond(Packet * &pkt, Tick time) { - si->respond(pkt,time); + //si->respond(pkt,time); + cpuSidePort->sendAtomic(pkt); } /** diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index 02fdd7a514..67e65d25bb 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -97,7 +97,7 @@ class CacheBlk int refCount; CacheBlk() - : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), xc(0), + : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), set(-1), refCount(0) {} @@ -114,7 +114,6 @@ class CacheBlk size = rhs.size; status = rhs.status; whenReady = rhs.whenReady; - xc = rhs.xc; set = rhs.set; refCount = rhs.refCount; return *this; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3dc95af68f..699d874deb 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -51,14 +51,6 @@ #include "mem/cache/miss/mshr.hh" #include "mem/cache/prefetch/prefetcher.hh" -#include "mem/bus/bus.hh" - -#include "mem/bus/slave_interface.hh" -#include "mem/memory_interface.hh" -#include "mem/bus/master_interface.hh" - -#include "mem/mem_debug.hh" - #include "sim/sim_events.hh" // for SimExitEvent using namespace std; @@ -66,7 +58,7 @@ using namespace std; template bool Cache:: -doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -74,17 +66,18 @@ doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoop(pkt); } + return true; //Deal with blocking.... } template Tick Cache:: -doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -92,7 +85,7 @@ doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoopProbe(pkt, true); @@ -102,7 +95,7 @@ doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) template void Cache:: -doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -110,7 +103,7 @@ doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoopProbe(pkt, false); @@ -128,9 +121,9 @@ recvStatusChange(Port::Status status, bool isCpuSide) template Cache:: -Cache(const std::string &_name, HierParams *hier_params, +Cache(const std::string &_name, Cache::Params ¶ms) - : BaseCache(_name, hier_params, params.baseParams), + : BaseCache(_name, params.baseParams), prefetchAccess(params.prefetchAccess), tags(params.tags), missQueue(params.missQueue), coherence(params.coherence), prefetcher(params.prefetcher), @@ -148,7 +141,7 @@ Cache(const std::string &_name, HierParams *hier_params, prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); invalidatePkt = new Packet; - invalidatePkt->cmd = Invalidate; + invalidatePkt->cmd = Packet::InvalidateReq; } template @@ -163,12 +156,13 @@ Cache::regStats() } template -MemAccessResult -Cache::access(Packet &pkt) +bool +Cache::access(PacketPtr &pkt) { - MemDebug::cacheAccess(pkt); +//@todo Add back in MemDebug Calls +// MemDebug::cacheAccess(pkt); BlkType *blk = NULL; - PacketList* writebacks; + PacketList writebacks; int size = blkSize; int lat = hitLatency; if (prefetchAccess) { @@ -176,18 +170,19 @@ Cache::access(Packet &pkt) prefetcher->handleMiss(pkt, curTick); } if (!pkt->req->isUncacheable()) { - if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() - && !pkt->cmd.isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { //Upgrade or Invalidate //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), - pkt->paddr & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), + pkt->addr & ~((Addr)blkSize - 1)); //@todo Should this return latency have the hit latency in it? // respond(pkt,curTick+lat); - pkt->flags |= SATISFIED; - return MA_HIT; + (int)pkt->coherence |= SATISFIED; +// return MA_HIT; //@todo, return values + return true; } blk = tags->handleAccess(pkt, lat, writebacks); } else { @@ -198,10 +193,10 @@ Cache::access(Packet &pkt) /** @todo make the fast write alloc (wh64) work with coherence. */ /** @todo Do we want to do fast writes for writebacks as well? */ if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && - (pkt->cmd == Write || pkt->cmd == WriteInvalidate) ) { + (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) { // not outstanding misses, can do this - MSHR* outstanding_miss = missQueue->findMSHR(pkt->paddr, pkt->req->asid); - if (pkt->cmd ==WriteInvalidate || !outstanding_miss) { + MSHR* outstanding_miss = missQueue->findMSHR(pkt->addr, pkt->req->asid); + if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) { if (outstanding_miss) { warn("WriteInv doing a fastallocate" "with an outstanding miss to the same address\n"); @@ -215,16 +210,17 @@ Cache::access(Packet &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); + DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->addr & ~((Addr)blkSize - 1), pkt->req->pc); if (blk) { // Hit hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; // clear dirty bit if write through - if (!pkt->cmd.isNoResponse()) + if (pkt->needsResponse()) respond(pkt, curTick+lat); - return MA_HIT; +// return MA_HIT; + return true; } // Miss @@ -234,11 +230,12 @@ Cache::access(Packet &pkt) if (missCount) { --missCount; if (missCount == 0) - new SimExitEvent("A cache reached the maximum miss count"); + new SimLoopExitEvent("A cache reached the maximum miss count"); } } missQueue->handleMiss(pkt, size, curTick + hitLatency); - return MA_CACHE_MISS; +// return MA_CACHE_MISS; + return true; } @@ -249,7 +246,7 @@ Cache::getPacket() Packet * pkt = missQueue->getPacket(); if (pkt) { if (!pkt->req->isUncacheable()) { - if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->req->getThreadNum()]++; + if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++; BlkType *blk = tags->findBlock(pkt); Packet::Command cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); @@ -257,19 +254,19 @@ Cache::getPacket() } } - assert(!doMasterPktuest() || missQueue->havePending()); + assert(!doMasterRequest() || missQueue->havePending()); assert(!pkt || pkt->time <= curTick); return pkt; } template void -Cache::sendResult(MemPktPtr &pkt, bool success) +Cache::sendResult(PacketPtr &pkt, bool success) { if (success) { missQueue->markInService(pkt); //Temp Hack for UPGRADES - if (pkt->cmd == Upgrade) { + if (pkt->cmd == Packet::UpgradeReq) { handleResponse(pkt); } } else if (pkt && !pkt->req->isUncacheable()) { @@ -283,14 +280,14 @@ Cache::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { - MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->paddr, - pkt->paddr & (((ULL(1))<<48)-1)); +// MemDebug::cacheResponse(pkt); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->addr, + pkt->addr & (((ULL(1))<<48)-1)); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; - MemPktList writebacks; + PacketList writebacks; blk = tags->handleFill(blk, pkt->senderState, coherence->getNewState(pkt,old_state), writebacks); @@ -309,11 +306,11 @@ Cache::pseudoFill(Addr addr, int asid) // Need to temporarily move this blk into MSHRs MSHR *mshr = missQueue->allocateTargetList(addr, asid); int lat; - PacketList* dummy; + PacketList dummy; // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert(mshr->pkt->isSatisfied()); + assert((int)mshr->pkt->coherence & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. @@ -325,17 +322,17 @@ void Cache::pseudoFill(MSHR *mshr) { // Need to temporarily move this blk into MSHRs - assert(mshr->pkt->cmd == Read); + assert(mshr->pkt->cmd == Packet::ReadReq); int lat; - PacketList* dummy; + PacketList dummy; // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert(mshr->pkt->isSatisfied()); + assert((int)mshr->pkt->coherence & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. - tags->invalidateBlk(mshr->pkt->paddr, mshr->pkt->req->asid); + tags->invalidateBlk(mshr->pkt->addr, mshr->pkt->req->asid); } @@ -351,19 +348,19 @@ template void Cache::snoop(Packet * &pkt) { - Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req //If we find an mshr, and it is in service, we need to NACK or invalidate if (mshr) { if (mshr->inService) { - if ((mshr->pkt->cmd.isInvalidate() || !mshr->pkt->isCacheFill()) - && (pkt->cmd != Invalidate && pkt->cmd != WriteInvalidate)) { + if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill()) + && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) { //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) - pkt->flags |= NACKED_LINE; + (int)pkt->coherence |= NACKED_LINE; return; } else { @@ -376,11 +373,11 @@ Cache::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - invalidatePkt->paddr = pkt->paddr; + invalidatePkt->addr = pkt->addr; //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); return; } } @@ -388,34 +385,32 @@ Cache::snoop(Packet * &pkt) //We also need to check the writeback buffers and handle those std::vector writebacks; if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); //Look through writebacks for any non-uncachable writes, use that for (int i=0; ipkt->req->isUncacheable()) { - if (pkt->cmd.isRead()) { + if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data - pkt->flags |= SATISFIED; + (int)pkt->coherence |= SATISFIED; //If we are in an exclusive protocol, make it ask again //to get write permissions (upgrade), signal shared - pkt->flags |= SHARED_LINE; + (int)pkt->coherence |= SHARED_LINE; - if (doData()) { - assert(pkt->cmd.isRead()); + assert(pkt->isRead()); + assert(pkt->offset < blkSize); + assert(pkt->size <= blkSize); + assert(pkt->offset + pkt->size <=blkSize); + memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); - assert(pkt->offset < blkSize); - assert(pkt->size <= blkSize); - assert(pkt->offset + pkt->size <=blkSize); - memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); - } respondToSnoop(pkt); } - if (pkt->cmd.isInvalidate()) { + if (pkt->isInvalidate()) { //This must be an upgrade or other cache will take ownership missQueue->markInService(mshr->pkt); } @@ -439,7 +434,7 @@ void Cache::snoopResponse(Packet * &pkt) { //Need to handle the response, if NACKED - if (pkt->isNacked()) { + if ((int)pkt->coherence & NACKED_LINE) { //Need to mark it as not in service, and retry for bus assert(0); //Yeah, we saw a NACK come through @@ -467,16 +462,16 @@ template Tick Cache::probe(Packet * &pkt, bool update) { - MemDebug::cacheProbe(pkt); +// MemDebug::cacheProbe(pkt); if (!pkt->req->isUncacheable()) { - if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() - && !pkt->cmd.isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward - DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), - pkt->paddr & ~((Addr)blkSize - 1)); - pkt->flags |= SATISFIED; + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), + pkt->addr & ~((Addr)blkSize - 1)); + (int)pkt->coherence |= SATISFIED; return 0; } } @@ -486,13 +481,13 @@ Cache::probe(Packet * &pkt, bool update) return mi->sendProbe(pkt,update); } - PacketList* writebacks; + PacketList writebacks; int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); if (!blk) { // Need to check for outstanding misses and writes - Addr blk_addr = pkt->paddr & ~(blkSize - 1); + Addr blk_addr = pkt->addr & ~(blkSize - 1); // There can only be one matching outstanding miss. MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); @@ -513,12 +508,12 @@ Cache::probe(Packet * &pkt, bool update) Packet * target = *i; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->cmd.isWrite() && target->overlaps(pkt)) { + if (target->isWrite() && target->overlaps(pkt)) { uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (target->paddr < pkt->paddr) { - int offset = pkt->paddr - target->paddr; + if (target->addr < pkt->addr) { + int offset = pkt->addr - target->paddr; pkt_data = pkt->data; write_data = target->data + offset; data_size = target->size - offset; @@ -526,7 +521,7 @@ Cache::probe(Packet * &pkt, bool update) if (data_size > pkt->size) data_size = pkt->size; } else { - int offset = target->paddr - pkt->paddr; + int offset = target->addr - pkt->addr; pkt_data = pkt->data + offset; write_data = target->data; data_size = pkt->size - offset; @@ -535,7 +530,7 @@ Cache::probe(Packet * &pkt, bool update) data_size = target->size; } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { memcpy(pkt_data, write_data, data_size); } else { memcpy(write_data, pkt_data, data_size); @@ -550,8 +545,8 @@ Cache::probe(Packet * &pkt, bool update) uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (write->paddr < pkt->paddr) { - int offset = pkt->paddr - write->paddr; + if (write->addr < pkt->addr) { + int offset = pkt->addr - write->addr; pkt_data = pkt->data; write_data = write->data + offset; data_size = write->size - offset; @@ -559,7 +554,7 @@ Cache::probe(Packet * &pkt, bool update) if (data_size > pkt->size) data_size = pkt->size; } else { - int offset = write->paddr - pkt->paddr; + int offset = write->addr - pkt->addr; pkt_data = pkt->data + offset; write_data = write->data; data_size = pkt->size - offset; @@ -568,7 +563,7 @@ Cache::probe(Packet * &pkt, bool update) data_size = write->size; } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { memcpy(pkt_data, write_data, data_size); } else { memcpy(write_data, pkt_data, data_size); @@ -585,8 +580,8 @@ Cache::probe(Packet * &pkt, bool update) } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill - Packet * busPkt = new MemPkt(); - busPkt->paddr = blk_addr; + Packet * busPkt = new Packet(); + busPkt->addr = blk_addr; busPkt->size = blkSize; busPkt->data = new uint8_t[blkSize]; @@ -632,7 +627,7 @@ Cache::probe(Packet * &pkt, bool update) if (update) { hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - } else if (pkt->cmd.isWrite()) { + } else if (pkt->isWrite()) { // Still need to change data in all locations. return mi->sendProbe(pkt, update); } @@ -644,9 +639,9 @@ Cache::probe(Packet * &pkt, bool update) template Tick -Cache::snoopProbe(MemPktPtr &pkt, bool update) +Cache::snoopProbe(PacketPtr &pkt, bool update) { - Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); CacheBlk::State new_state = 0; diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh index 4f65205525..21351ace46 100644 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Ron Dreslinski * Steve Reinhardt */ @@ -40,7 +41,6 @@ #include "sim/sim_object.hh" #include "mem/packet.hh" -#include "mem/mem_cmd.hh" #include "mem/cache/cache_blk.hh" #include "base/statistics.hh" @@ -89,7 +89,7 @@ class CoherenceProtocol : public SimObject * @param oldState The current block state. * @return The new state. */ - CacheBlk::State getNewState(const Packet * &pkt, + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State oldState); /** diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh index 1956745902..ca9d18bebd 100644 --- a/src/mem/cache/coherence/simple_coherence.hh +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Ron Dreslinski */ /** @@ -39,7 +40,6 @@ #include #include "mem/packet.hh" -#include "mem/mem_cmd.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" #include "mem/cache/coherence/coherence_protocol.hh" @@ -119,7 +119,7 @@ class SimpleCoherence //Got rid of, there could be an MSHR, but it can't be in service if (blk != NULL) { - if (pkt->cmd != Writeback) { + if (pkt->cmd != Packet::Writeback) { return protocol->handleBusRequest(cache, pkt, blk, mshr, new_state); } @@ -138,7 +138,7 @@ class SimpleCoherence */ Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) { - if (cmd == Writeback) return Writeback; + if (cmd == Packet::Writeback) return Packet::Writeback; return protocol->getBusCmd(cmd, state); } diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh index b64f6c931a..4e895997f6 100644 --- a/src/mem/cache/coherence/uni_coherence.hh +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -34,7 +34,6 @@ #include "base/trace.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" -#include "mem/mem_cmd.hh" #include "mem/packet.hh" class BaseCache; @@ -79,11 +78,11 @@ class UniCoherence */ Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) { - if (cmd == Hard_Prefetch && state) + if (cmd == Packet::HardPFReq && state) warn("Trying to issue a prefetch to a block we already have\n"); - if (cmd == Writeback) - return Writeback; - return Read; + if (cmd == Packet::Writeback) + return Packet::Writeback; + return Packet::ReadReq; } /** @@ -96,7 +95,7 @@ class UniCoherence { if (pkt->senderState) //Blocking Buffers don't get mshrs { - if (pkt->senderState->originalCmd == Hard_Prefetch) { + if (((MSHR *)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); return BlkHWPrefetched | BlkValid | BlkWritable; } diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 08814b43e6..39a06a3779 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -166,7 +166,7 @@ public: * has been sent to the bus, this function removes all of its targets. * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index d459821086..b88b7038c6 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -270,7 +270,7 @@ class MissQueue * has been sent to the bus, this function removes all of its targets. * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 563368d292..a67f1b9a6f 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -192,7 +192,7 @@ class MSHRQueue { * is in service, just squashes the targets. * @param req->getThreadNum()ber The thread to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Returns true if the pending list is not empty. diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 176c6f793b..3d37686768 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -41,10 +41,17 @@ #include "mem/request.hh" #include "arch/isa_traits.hh" #include "sim/root.hh" +#include struct Packet; typedef Packet* PacketPtr; typedef uint8_t* PacketDataPtr; +typedef std::list PacketList; + +//Coherence Flags +#define NACKED_LINE 1 << 0 +#define SATISFIED 1 << 1 +#define SHARED_LINE 1 << 2 //For statistics we need max number of commands, hard code it at //20 for now. @todo fix later @@ -173,7 +180,10 @@ class Packet SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, - HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse + HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, + InvalidateReq = IsInvalidate | IsRequest, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, + UpgradeReq = IsInvalidate | NeedsResponse }; /** Return the string name of the cmd field (for debugging and @@ -190,9 +200,14 @@ class Packet Command cmd; bool isRead() { return (cmd & IsRead) != 0; } + bool isWrite() { return (cmd & IsWrite) != 0; } bool isRequest() { return (cmd & IsRequest) != 0; } bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } + bool isInvalidate() { return (cmd * IsInvalidate) != 0; } + + bool isCacheFill() { assert("Unimplemented yet\n" && 0); } + bool isNoAllocate() { assert("Unimplemented yet\n" && 0); } /** Possible results of a packet's request. */ enum Result diff --git a/src/mem/request.hh b/src/mem/request.hh index 469184b130..46d9b6fd70 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -44,6 +44,7 @@ class Request; typedef Request* RequestPtr; + /** The request is a Load locked/store conditional. */ const unsigned LOCKED = 0x001; /** The virtual address is also the physical address. */ From eafb5c4936f7d3233c223d69b435c6be360bbfb2 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 29 Jun 2006 16:07:19 -0400 Subject: [PATCH 076/152] Still missing prefetch and tags directories as well as cache builder. Some implementation details were left blank still, need to fill them in. src/SConscript: Reorder build to compile all files first src/mem/cache/cache.hh: src/mem/cache/cache_builder.cc: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/coherence/uni_coherence.cc: src/mem/cache/coherence/uni_coherence.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: More changesets pulled, now compiles everything in /miss directory and in the root directory src/mem/packet.hh: Add some more support, need to clean some of it out once everything is working --HG-- extra : convert_revision : ba73676165810edf2c2effaf5fbad8397d6bd800 --- src/SConscript | 3 +- src/mem/cache/cache.hh | 35 ++- src/mem/cache/cache_builder.cc | 38 ++- src/mem/cache/cache_impl.hh | 185 ++++++------ src/mem/cache/coherence/coherence_protocol.cc | 283 +++++++++--------- src/mem/cache/coherence/uni_coherence.cc | 13 +- src/mem/cache/coherence/uni_coherence.hh | 1 + src/mem/cache/miss/blocking_buffer.cc | 91 +++--- src/mem/cache/miss/miss_queue.cc | 163 +++++----- src/mem/cache/miss/mshr.cc | 35 ++- src/mem/cache/miss/mshr.hh | 2 +- src/mem/cache/miss/mshr_queue.cc | 30 +- src/mem/packet.hh | 44 ++- 13 files changed, 491 insertions(+), 432 deletions(-) diff --git a/src/SConscript b/src/SConscript index ff41e59316..04da17ee63 100644 --- a/src/SConscript +++ b/src/SConscript @@ -103,7 +103,6 @@ base_sources = Split(''' mem/cache/base_cache.cc mem/cache/cache.cc - mem/cache/cache_builder.cc mem/cache/coherence/coherence_protocol.cc mem/cache/coherence/uni_coherence.cc mem/cache/miss/blocking_buffer.cc @@ -126,6 +125,8 @@ base_sources = Split(''' mem/cache/tags/split_lifo.cc mem/cache/tags/split_lru.cc + mem/cache/cache_builder.cc + sim/builder.cc sim/debug.cc sim/eventq.cc diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 587faaf511..d2af1d8bf4 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -44,8 +44,9 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/prefetch/prefetcher.hh" -// forward declarations -class Bus; +//Forward decleration +class MSHR; + /** * A template-policy based cache. The behavior of the cache can be altered by @@ -92,6 +93,11 @@ class Cache : public BaseCache */ int busWidth; + /** + * The latency of a hit in this device. + */ + int hitLatency; + /** * A permanent mem req to always be used to cause invalidations. * Used to append to target list, to cause an invalidation. @@ -121,18 +127,18 @@ class Cache : public BaseCache bool doCopy; bool blockOnCopy; BaseCache::Params baseParams; - Bus *in; - Bus *out; Prefetcher *prefetcher; bool prefetchAccess; + int hitLatency; Params(TagStore *_tags, Buffering *mq, Coherence *coh, - bool do_copy, BaseCache::Params params, Bus * in_bus, - Bus * out_bus, Prefetcher *_prefetcher, - bool prefetch_access) + bool do_copy, BaseCache::Params params, + Prefetcher *_prefetcher, + bool prefetch_access, int hit_latency) : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy), - blockOnCopy(false), baseParams(params), in(in_bus), out(out_bus), - prefetcher(_prefetcher), prefetchAccess(prefetch_access) + blockOnCopy(false), baseParams(params), + prefetcher(_prefetcher), prefetchAccess(prefetch_access), + hitLatency(hit_latency) { } }; @@ -140,6 +146,17 @@ class Cache : public BaseCache /** Instantiates a basic cache object. */ Cache(const std::string &_name, Params ¶ms); + bool doTimingAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + void doFunctionalAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + void recvStatusChange(Port::Status status, bool isCpuSide); + void regStats(); /** diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index e3efb9bc36..8758dc57a4 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -42,7 +42,7 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/cache.hh" -#include "mem/bus/bus.hh" +#include "mem/bus.hh" #include "mem/cache/coherence/coherence_protocol.hh" #include "sim/builder.hh" @@ -84,13 +84,6 @@ #include "mem/cache/coherence/uni_coherence.hh" #include "mem/cache/coherence/simple_coherence.hh" -// Bus Interfaces -#include "mem/bus/slave_interface.hh" -#include "mem/bus/master_interface.hh" -#include "mem/memory_interface.hh" - -#include "mem/trace/mem_trace_writer.hh" - //Prefetcher Headers #if defined(USE_GHB) #include "mem/cache/prefetch/ghb_prefetcher.hh" @@ -118,8 +111,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param tgts_per_mshr; Param write_buffers; Param prioritizeRequests; - SimObjectParam in_bus; - SimObjectParam out_bus; +// SimObjectParam in_bus; +// SimObjectParam out_bus; Param do_copy; SimObjectParam protocol; Param trace_addr; @@ -133,9 +126,9 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param compression_latency; Param subblock_size; Param max_miss_count; - SimObjectParam hier; +// SimObjectParam hier; VectorParam > addr_range; - SimObjectParam mem_trace; +// SimObjectParam mem_trace; Param split; Param split_size; Param lifo; @@ -151,6 +144,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param prefetch_cache_check_push; Param prefetch_use_cpu_id; Param prefetch_data_accesses_only; + Param hit_latency; END_DECLARE_SIM_OBJECT_PARAMS(BaseCache) @@ -166,8 +160,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", false), - INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), +/* INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), INIT_PARAM(out_bus, "outgoing bus object"), +*/ INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false), INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), INIT_PARAM_DFLT(trace_addr, "address to trace", 0), @@ -192,12 +187,13 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(max_miss_count, "The number of misses to handle before calling exit", 0), - INIT_PARAM_DFLT(hier, +/* INIT_PARAM_DFLT(hier, "Hierarchy global variables", &defaultHierParams), +*/ INIT_PARAM_DFLT(addr_range, "The address range in bytes", vector >(1,RangeIn((Addr)0, MaxAddr))), - INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), +// INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false), INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0), INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false), @@ -212,7 +208,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"), INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true), INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true), - INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false) + INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false), + INIT_PARAM_DFLT(hit_latency, "Hit Latecny for a succesful access", 1) END_INIT_SIM_OBJECT_PARAMS(BaseCache) @@ -232,12 +229,12 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) } \ Cache, b, c>::Params params(tagStore, mq, coh, \ do_copy, base_params, \ - in_bus, out_bus, pf, \ + /*in_bus, out_bus,*/ pf, \ prefetch_access); \ Cache, b, c> *retval = \ - new Cache, b, c>(getInstanceName(), hier, \ + new Cache, b, c>(getInstanceName(), /*hier,*/ \ params); \ - if (in_bus == NULL) { \ +/* if (in_bus == NULL) { \ retval->setSlaveInterface(new MemoryInterface, b, c> >(getInstanceName(), hier, retval, mem_trace)); \ } else { \ retval->setSlaveInterface(new SlaveInterface, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \ @@ -245,6 +242,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ out_bus->rangeChange(); \ return retval; \ +*/return true; \ } while (0) #define BUILD_CACHE_PANIC(x) do { \ @@ -465,7 +463,7 @@ CREATE_SIM_OBJECT(BaseCache) const void *repl = NULL; #endif - if (mshrs == 1 || out_bus->doEvents() == false) { + if (mshrs == 1 /*|| out_bus->doEvents() == false*/) { BlockingBuffer *mq = new BlockingBuffer(true); BUILD_COHERENCE(BlockingBuffer); } else { diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 699d874deb..dbf2e49f14 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -90,6 +90,8 @@ doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) else snoopProbe(pkt, true); } + //Fix this timing info + return hitLatency; } template @@ -129,10 +131,12 @@ Cache(const std::string &_name, coherence(params.coherence), prefetcher(params.prefetcher), doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) { - if (params.in == NULL) { +//FIX BUS POINTERS +// if (params.in == NULL) { topLevelCache = true; - } - tags->setCache(this, params.out->width, params.out->clockRate); +// } +//PLEASE FIX THIS, BUS SIZES NOT BEING USED + tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/); tags->setPrefetcher(prefetcher); missQueue->setCache(this); missQueue->setPrefetcher(prefetcher); @@ -140,8 +144,10 @@ Cache(const std::string &_name, prefetcher->setCache(this); prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); +#if 0 invalidatePkt = new Packet; invalidatePkt->cmd = Packet::InvalidateReq; +#endif } template @@ -175,27 +181,27 @@ Cache::access(PacketPtr &pkt) //Upgrade or Invalidate //Look into what happens if two slave caches on bus DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), - pkt->addr & ~((Addr)blkSize - 1)); + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); //@todo Should this return latency have the hit latency in it? // respond(pkt,curTick+lat); - (int)pkt->coherence |= SATISFIED; + pkt->flags |= SATISFIED; // return MA_HIT; //@todo, return values return true; } blk = tags->handleAccess(pkt, lat, writebacks); } else { - size = pkt->size; + size = pkt->getSize(); } // If this is a block size write/hint (WH64) allocate the block here // if the coherence protocol allows it. /** @todo make the fast write alloc (wh64) work with coherence. */ /** @todo Do we want to do fast writes for writebacks as well? */ - if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && + if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) { // not outstanding misses, can do this - MSHR* outstanding_miss = missQueue->findMSHR(pkt->addr, pkt->req->asid); + MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr(), pkt->req->getAsid()); if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) { if (outstanding_miss) { warn("WriteInv doing a fastallocate" @@ -211,8 +217,8 @@ Cache::access(PacketPtr &pkt) writebacks.pop_front(); } DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->addr & ~((Addr)blkSize - 1), pkt->req->pc); + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); if (blk) { // Hit hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; @@ -230,7 +236,7 @@ Cache::access(PacketPtr &pkt) if (missCount) { --missCount; if (missCount == 0) - new SimLoopExitEvent("A cache reached the maximum miss count"); + new SimLoopExitEvent(curTick, "A cache reached the maximum miss count"); } } missQueue->handleMiss(pkt, size, curTick + hitLatency); @@ -281,14 +287,14 @@ Cache::handleResponse(Packet * &pkt) BlkType *blk = NULL; if (pkt->senderState) { // MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->addr, - pkt->addr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), + pkt->getAddr() & (((ULL(1))<<48)-1)); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; - blk = tags->handleFill(blk, pkt->senderState, + blk = tags->handleFill(blk, (MSHR*)pkt->senderState, coherence->getNewState(pkt,old_state), writebacks); while (!writebacks.empty()) { @@ -310,7 +316,7 @@ Cache::pseudoFill(Addr addr, int asid) // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert((int)mshr->pkt->coherence & SATISFIED); + assert(mshr->pkt->flags & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. @@ -328,11 +334,11 @@ Cache::pseudoFill(MSHR *mshr) // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert((int)mshr->pkt->coherence & SATISFIED); + assert(mshr->pkt->flags & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. - tags->invalidateBlk(mshr->pkt->addr, mshr->pkt->req->asid); + tags->invalidateBlk(mshr->pkt->getAddr(), mshr->pkt->req->getAsid()); } @@ -348,9 +354,10 @@ template void Cache::snoop(Packet * &pkt) { - Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); - MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req //If we find an mshr, and it is in service, we need to NACK or invalidate if (mshr) { @@ -360,7 +367,7 @@ Cache::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) - (int)pkt->coherence |= NACKED_LINE; + pkt->flags |= NACKED_LINE; return; } else { @@ -373,19 +380,19 @@ Cache::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - invalidatePkt->addr = pkt->addr; + invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); return; } } } //We also need to check the writeback buffers and handle those std::vector writebacks; - if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); + if (missQueue->findWrites(blk_addr, pkt->req->getAsid(), writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); //Look through writebacks for any non-uncachable writes, use that for (int i=0; i::snoop(Packet * &pkt) if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data - (int)pkt->coherence |= SATISFIED; + pkt->flags |= SATISFIED; //If we are in an exclusive protocol, make it ask again //to get write permissions (upgrade), signal shared - (int)pkt->coherence |= SHARED_LINE; + pkt->flags |= SHARED_LINE; assert(pkt->isRead()); - assert(pkt->offset < blkSize); - assert(pkt->size <= blkSize); - assert(pkt->offset + pkt->size <=blkSize); - memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); + Addr offset = pkt->getAddr() & ~(blkSize - 1); + assert(offset < blkSize); + assert(pkt->getSize() <= blkSize); + assert(offset + pkt->getSize() <=blkSize); + memcpy(pkt->getPtr(), mshr->pkt->getPtr() + offset, pkt->getSize()); respondToSnoop(pkt); } @@ -434,7 +442,7 @@ void Cache::snoopResponse(Packet * &pkt) { //Need to handle the response, if NACKED - if ((int)pkt->coherence & NACKED_LINE) { + if (pkt->flags & NACKED_LINE) { //Need to mark it as not in service, and retry for bus assert(0); //Yeah, we saw a NACK come through @@ -463,41 +471,35 @@ Tick Cache::probe(Packet * &pkt, bool update) { // MemDebug::cacheProbe(pkt); - if (!pkt->req->isUncacheable()) { if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), - pkt->addr & ~((Addr)blkSize - 1)); - (int)pkt->coherence |= SATISFIED; + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); + pkt->flags |= SATISFIED; return 0; } } - if (!update && !doData()) { - // Nothing to do here - return mi->sendProbe(pkt,update); - } - PacketList writebacks; int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); if (!blk) { // Need to check for outstanding misses and writes - Addr blk_addr = pkt->addr & ~(blkSize - 1); + Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); // There can only be one matching outstanding miss. - MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); // There can be many matching outstanding writes. vector writes; - missQueue->findWrites(blk_addr, pkt->req->asid, writes); + missQueue->findWrites(blk_addr, pkt->req->getAsid(), writes); if (!update) { - mi->sendProbe(pkt, update); + memSidePort->sendFunctional(pkt); // Check for data in MSHR and writebuffer. if (mshr) { warn("Found outstanding miss on an non-update probe"); @@ -508,26 +510,26 @@ Cache::probe(Packet * &pkt, bool update) Packet * target = *i; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->isWrite() && target->overlaps(pkt)) { + if (target->isWrite() && target->intersect(pkt)) { uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (target->addr < pkt->addr) { - int offset = pkt->addr - target->paddr; - pkt_data = pkt->data; - write_data = target->data + offset; - data_size = target->size - offset; + if (target->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - target->getAddr(); + pkt_data = pkt->getPtr(); + write_data = target->getPtr() + offset; + data_size = target->getSize() - offset; assert(data_size > 0); - if (data_size > pkt->size) - data_size = pkt->size; + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); } else { - int offset = target->addr - pkt->addr; - pkt_data = pkt->data + offset; - write_data = target->data; - data_size = pkt->size - offset; - assert(data_size > pkt->size); - if (data_size > target->size) - data_size = target->size; + int offset = target->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr() + offset; + write_data = target->getPtr(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > target->getSize()) + data_size = target->getSize(); } if (pkt->isWrite()) { @@ -540,27 +542,27 @@ Cache::probe(Packet * &pkt, bool update) } for (int i = 0; i < writes.size(); ++i) { Packet * write = writes[i]->pkt; - if (write->overlaps(pkt)) { + if (write->intersect(pkt)) { warn("Found outstanding write on an non-update probe"); uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (write->addr < pkt->addr) { - int offset = pkt->addr - write->addr; - pkt_data = pkt->data; - write_data = write->data + offset; - data_size = write->size - offset; + if (write->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - write->getAddr(); + pkt_data = pkt->getPtr(); + write_data = write->getPtr() + offset; + data_size = write->getSize() - offset; assert(data_size > 0); - if (data_size > pkt->size) - data_size = pkt->size; + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); } else { - int offset = write->addr - pkt->addr; - pkt_data = pkt->data + offset; - write_data = write->data; - data_size = pkt->size - offset; - assert(data_size > pkt->size); - if (data_size > write->size) - data_size = write->size; + int offset = write->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr() + offset; + write_data = write->getPtr(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > write->getSize()) + data_size = write->getSize(); } if (pkt->isWrite()) { @@ -580,23 +582,20 @@ Cache::probe(Packet * &pkt, bool update) } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill - Packet * busPkt = new Packet(); - busPkt->addr = blk_addr; - busPkt->size = blkSize; - busPkt->data = new uint8_t[blkSize]; - BlkType *blk = tags->findBlock(pkt); - busPkt->cmd = coherence->getBusCmd(pkt->cmd, + Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); - busPkt->req->asid = pkt->req->asid; - busPkt->xc = pkt->xc; - busPkt->req->setThreadNum() = pkt->req->getThreadNum(); + Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); + + uint8_t* temp_data = new uint8_t[blkSize]; + busPkt->dataDynamicArray(temp_data); + busPkt->time = curTick; - lat = mi->sendProbe(busPkt, update); + lat = memSidePort->sendAtomic(busPkt); - if (!busPkt->isSatisfied()) { + if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return return 0; } @@ -609,19 +608,19 @@ Cache::probe(Packet * &pkt, bool update) writebacks, pkt); // Handle writebacks if needed while (!writebacks.empty()){ - mi->sendProbe(writebacks.front(), update); + memSidePort->sendAtomic(writebacks.front()); writebacks.pop_front(); } return lat + hitLatency; } else { - return mi->sendProbe(pkt,update); + return memSidePort->sendAtomic(pkt); } } } else { // There was a cache hit. // Handle writebacks if needed while (!writebacks.empty()){ - mi->sendProbe(writebacks.front(), update); + memSidePort->sendAtomic(writebacks.front()); writebacks.pop_front(); } @@ -629,7 +628,7 @@ Cache::probe(Packet * &pkt, bool update) hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } else if (pkt->isWrite()) { // Still need to change data in all locations. - return mi->sendProbe(pkt, update); + return memSidePort->sendAtomic(pkt); } return curTick + lat; } @@ -641,11 +640,11 @@ template Tick Cache::snoopProbe(PacketPtr &pkt, bool update) { - Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); - MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); CacheBlk::State new_state = 0; - bool satisfy = coherence->handleBusPktuest(pkt,blk,mshr, new_state); + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { tags->handleSnoop(blk, new_state, pkt); return hitLatency; diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 9d5b8ef546..bcf3ce9c50 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -47,7 +47,7 @@ using namespace std; CoherenceProtocol::StateTransition::StateTransition() - : busCmd(InvalidCmd), newState(-1), snoopFunc(invalidTransition) + : busCmd(Packet::InvalidCmd), newState(-1), snoopFunc(invalidTransition) { } @@ -59,132 +59,132 @@ CoherenceProtocol::regStats() // requestCount and snoopCount arrays, most of these are invalid, // so we just select the interesting ones to print here. - requestCount[Invalid][Read] + requestCount[Invalid][Packet::ReadReq] .name(name() + ".read_invalid") .desc("read misses to invalid blocks") ; - requestCount[Invalid][Write] + requestCount[Invalid][Packet::WriteReq] .name(name() +".write_invalid") .desc("write misses to invalid blocks") ; - requestCount[Invalid][Soft_Prefetch] + requestCount[Invalid][Packet::SoftPFReq] .name(name() +".swpf_invalid") .desc("soft prefetch misses to invalid blocks") ; - requestCount[Invalid][Hard_Prefetch] + requestCount[Invalid][Packet::HardPFReq] .name(name() +".hwpf_invalid") .desc("hard prefetch misses to invalid blocks") ; - requestCount[Shared][Write] + requestCount[Shared][Packet::WriteReq] .name(name() + ".write_shared") .desc("write misses to shared blocks") ; - requestCount[Owned][Write] + requestCount[Owned][Packet::WriteReq] .name(name() + ".write_owned") .desc("write misses to owned blocks") ; - snoopCount[Shared][Read] + snoopCount[Shared][Packet::ReadReq] .name(name() + ".snoop_read_shared") .desc("read snoops on shared blocks") ; - snoopCount[Shared][ReadEx] + snoopCount[Shared][Packet::ReadExReq] .name(name() + ".snoop_readex_shared") .desc("readEx snoops on shared blocks") ; - snoopCount[Shared][Upgrade] + snoopCount[Shared][Packet::UpgradeReq] .name(name() + ".snoop_upgrade_shared") .desc("upgradee snoops on shared blocks") ; - snoopCount[Modified][Read] + snoopCount[Modified][Packet::ReadReq] .name(name() + ".snoop_read_modified") .desc("read snoops on modified blocks") ; - snoopCount[Modified][ReadEx] + snoopCount[Modified][Packet::ReadExReq] .name(name() + ".snoop_readex_modified") .desc("readEx snoops on modified blocks") ; - snoopCount[Owned][Read] + snoopCount[Owned][Packet::ReadReq] .name(name() + ".snoop_read_owned") .desc("read snoops on owned blocks") ; - snoopCount[Owned][ReadEx] + snoopCount[Owned][Packet::ReadExReq] .name(name() + ".snoop_readex_owned") .desc("readEx snoops on owned blocks") ; - snoopCount[Owned][Upgrade] + snoopCount[Owned][Packet::UpgradeReq] .name(name() + ".snoop_upgrade_owned") .desc("upgrade snoops on owned blocks") ; - snoopCount[Exclusive][Read] + snoopCount[Exclusive][Packet::ReadReq] .name(name() + ".snoop_read_exclusive") .desc("read snoops on exclusive blocks") ; - snoopCount[Exclusive][ReadEx] + snoopCount[Exclusive][Packet::ReadExReq] .name(name() + ".snoop_readex_exclusive") .desc("readEx snoops on exclusive blocks") ; - snoopCount[Shared][Invalidate] + snoopCount[Shared][Packet::InvalidateReq] .name(name() + ".snoop_inv_shared") .desc("Invalidate snoops on shared blocks") ; - snoopCount[Owned][Invalidate] + snoopCount[Owned][Packet::InvalidateReq] .name(name() + ".snoop_inv_owned") .desc("Invalidate snoops on owned blocks") ; - snoopCount[Exclusive][Invalidate] + snoopCount[Exclusive][Packet::InvalidateReq] .name(name() + ".snoop_inv_exclusive") .desc("Invalidate snoops on exclusive blocks") ; - snoopCount[Modified][Invalidate] + snoopCount[Modified][Packet::InvalidateReq] .name(name() + ".snoop_inv_modified") .desc("Invalidate snoops on modified blocks") ; - snoopCount[Invalid][Invalidate] + snoopCount[Invalid][Packet::InvalidateReq] .name(name() + ".snoop_inv_invalid") .desc("Invalidate snoops on invalid blocks") ; - snoopCount[Shared][WriteInvalidate] + snoopCount[Shared][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_shared") .desc("WriteInvalidate snoops on shared blocks") ; - snoopCount[Owned][WriteInvalidate] + snoopCount[Owned][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_owned") .desc("WriteInvalidate snoops on owned blocks") ; - snoopCount[Exclusive][WriteInvalidate] + snoopCount[Exclusive][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_exclusive") .desc("WriteInvalidate snoops on exclusive blocks") ; - snoopCount[Modified][WriteInvalidate] + snoopCount[Modified][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_modified") .desc("WriteInvalidate snoops on modified blocks") ; - snoopCount[Invalid][WriteInvalidate] + snoopCount[Invalid][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_invalid") .desc("WriteInvalidate snoops on invalid blocks") ; @@ -270,167 +270,168 @@ CoherenceProtocol::CoherenceProtocol(const string &name, fatal(""); } - Packet::CommandEnum writeToSharedCmd = doUpgrades ? Upgrade : ReadEx; + Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Shared); - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(nullTransition); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); if (doUpgrades) { - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); } } else if(protocol == "mesi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Exclusive); + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); //It will move into shared if the shared line is asserted in the //getNewState function - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Exclusive][Read].onSnoop(assertShared); - transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); if (doUpgrades) { - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); } } else if(protocol == "mosi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); - transitionTable[Owned][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Shared); - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); - transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); } else if(protocol == "moesi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); - transitionTable[Owned][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Exclusive); + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); //It will move into shared if the shared line is asserted in the //getNewState function - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); - transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); - transitionTable[Exclusive][Read].onSnoop(assertShared); - transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); } else { @@ -446,14 +447,14 @@ CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, MSHR *mshr) { state &= stateMask; - int cmd_idx = cmdIn.toIndex(); + int cmd_idx = (int) cmdIn; assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd; - assert(cmdOut != InvalidCmd); + assert(cmdOut != Packet::InvalidCmd); ++requestCount[state][cmd_idx]; @@ -462,7 +463,7 @@ CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, CacheBlk::State -CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) +CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState) { CacheBlk::State state = oldState & stateMask; int cmd_idx = pkt->cmdToIndex(); diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc index 68a78e3951..5ab7062692 100644 --- a/src/mem/cache/coherence/uni_coherence.cc +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -44,8 +44,8 @@ Packet * UniCoherence::getPacket() { bool unblock = cshrs.isFull(); - Packet * pkt = cshrs.getPkt(); - cshrs.markInService(pkt->senderState); + Packet* pkt = cshrs.getReq(); + cshrs.markInService((MSHR*)pkt->senderState); if (!cshrs.havePending()) { cache->clearSlaveRequest(Request_Coherence); } @@ -65,15 +65,12 @@ UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, CacheBlk::State &new_state) { new_state = 0; - if (pkt->cmd.isInvalidate()) { + if (pkt->isInvalidate()) { DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", - pkt->paddr, blk); + pkt->getAddr(), blk); if (!cache->isTopLevel()) { // Forward to other caches - Packet * tmp = new MemPkt(); - tmp->cmd = Invalidate; - tmp->paddr = pkt->paddr; - tmp->size = pkt->size; + Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1); cshrs.allocate(tmp); cache->setSlaveRequest(Request_Coherence, curTick); if (cshrs.isFull()) { diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh index 4e895997f6..764bf62761 100644 --- a/src/mem/cache/coherence/uni_coherence.hh +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -32,6 +32,7 @@ #define __UNI_COHERENCE_HH__ #include "base/trace.hh" +#include "base/misc.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" #include "mem/packet.hh" diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 912a0f5bd0..d745cb8c65 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -33,12 +33,12 @@ * Definitions of a simple buffer for a blocking cache. */ -#include "cpu/exec_context.hh" #include "cpu/smt.hh" //for maxThreadsPerCPU #include "mem/cache/base_cache.hh" #include "mem/cache/miss/blocking_buffer.hh" #include "mem/cache/prefetch/base_prefetcher.hh" #include "sim/eventq.hh" // for Event declaration. +#include "mem/request.hh" using namespace TheISA; @@ -72,26 +72,26 @@ BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher) void BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) { - Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); - if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - pkt->cmd.isNoResponse())) { - if (pkt->cmd.isNoResponse()) { + Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1); + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { + if (!pkt->needsResponse()) { wb.allocateAsBuffer(pkt); } else { - wb.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); - } - if (cache->doData()) { - memcpy(wb.pkt->data, pkt->data, blk_size); + wb.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); } + + memcpy(wb.pkt->getPtr(), pkt->getPtr(), blk_size); + cache->setBlocked(Blocked_NoWBBuffers); cache->setMasterRequest(Request_WB, time); return; } - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { miss.allocateAsBuffer(pkt); } else { - miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + miss.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); } if (!pkt->req->isUncacheable()) { miss.pkt->flags |= CACHE_LINE_FILL; @@ -112,27 +112,27 @@ BlockingBuffer::getPacket() void BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd) { - MSHR *mshr = pkt->senderState; + MSHR *mshr = (MSHR*) pkt->senderState; mshr->originalCmd = pkt->cmd; if (pkt->isCacheFill()) - pkt->cmd = cmd; + pkt->cmdOverride(cmd); } void BlockingBuffer::restoreOrigCmd(Packet * &pkt) { - pkt->cmd = pkt->senderState->originalCmd; + pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd); } void BlockingBuffer::markInService(Packet * &pkt) { - if (!pkt->isCacheFill() && pkt->cmd.isWrite()) { + if (!pkt->isCacheFill() && pkt->isWrite()) { // Forwarding a write/ writeback, don't need to change // the command - assert(pkt->senderState == &wb); + assert((MSHR*)pkt->senderState == &wb); cache->clearMasterRequest(Request_WB); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); wb.deallocate(); cache->clearBlocked(Blocked_NoWBBuffers); @@ -140,9 +140,9 @@ BlockingBuffer::markInService(Packet * &pkt) wb.inService = true; } } else { - assert(pkt->senderState == &miss); + assert((MSHR*)pkt->senderState == &miss); cache->clearMasterRequest(Request_MSHR); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); miss.deallocate(); cache->clearBlocked(Blocked_NoMSHRs); @@ -158,24 +158,24 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) { if (pkt->isCacheFill()) { // targets were handled in the cache tags - assert(pkt->senderState == &miss); + assert((MSHR*)pkt->senderState == &miss); miss.deallocate(); cache->clearBlocked(Blocked_NoMSHRs); } else { - if (pkt->senderState->hasTargets()) { + if (((MSHR*)(pkt->senderState))->hasTargets()) { // Should only have 1 target if we had any - assert(pkt->senderState->getNumTargets() == 1); - Packet * target = pkt->senderState->getTarget(); - pkt->senderState->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1); + Packet * target = ((MSHR*)(pkt->senderState))->getTarget(); + ((MSHR*)(pkt->senderState))->popTarget(); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), target->getSize()); } cache->respond(target, time); - assert(!pkt->senderState->hasTargets()); + assert(!((MSHR*)(pkt->senderState))->hasTargets()); } - if (pkt->cmd.isWrite()) { - assert(pkt->senderState == &wb); + if (pkt->isWrite()) { + assert(((MSHR*)(pkt->senderState)) == &wb); wb.deallocate(); cache->clearBlocked(Blocked_NoWBBuffers); } else { @@ -186,15 +186,12 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) } void -BlockingBuffer::squash(int req->getThreadNum()ber) +BlockingBuffer::squash(int threadNum) { - if (miss.setThreadNum() == req->getThreadNum()ber) { + if (miss.threadNum == threadNum) { Packet * target = miss.getTarget(); miss.popTarget(); - assert(target->req->setThreadNum() == req->getThreadNum()ber); - if (target->completionEvent != NULL) { - delete target->completionEvent; - } + assert(target->req->getThreadNum() == threadNum); target = NULL; assert(!miss.hasTargets()); miss.ntargets=0; @@ -210,27 +207,20 @@ void BlockingBuffer::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { - // Generate request - Packet * pkt = new Packet(); - pkt->paddr = addr; - pkt->req->asid = asid; - pkt->size = size; - pkt->data = new uint8_t[size]; + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + uint8_t *new_data = new uint8_t[size]; + pkt->dataDynamicArray(new_data); if (data) { - memcpy(pkt->data, data, size); + memcpy(pkt->getPtr(), data, size); } - /** - * @todo Need to find a way to charge the writeback to the "correct" - * thread. - */ - pkt->req->setThreadNum() = 0; - pkt->cmd = Writeback; if (compressed) { pkt->flags |= COMPRESSED; } + ///All writebacks charged to same thread @todo figure this out writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); @@ -249,9 +239,8 @@ BlockingBuffer::doWriteback(Packet * &pkt) // Since allocate as buffer copies the request, // need to copy data here. - if (cache->doData()) { - memcpy(wb.pkt->data, pkt->data, pkt->size); - } + memcpy(wb.pkt->getPtr(), pkt->getPtr(), pkt->getSize()); + cache->setBlocked(Blocked_NoWBBuffers); cache->setMasterRequest(Request_WB, curTick); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index d02f27d52a..34290351de 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -34,7 +34,6 @@ * Miss and writeback queue definitions. */ -#include "cpu/exec_context.hh" #include "cpu/smt.hh" //for maxThreadsPerCPU #include "mem/cache/base_cache.hh" #include "mem/cache/miss/miss_queue.hh" @@ -59,6 +58,10 @@ MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, void MissQueue::regStats(const string &name) { + Request temp_req; + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + using namespace Stats; writebacks @@ -71,7 +74,7 @@ MissQueue::regStats(const string &name) // MSHR hit statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { Packet::Command cmd = (Packet::Command)access_idx; - const string &cstr = cmd.toString(); + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_hits[access_idx] .init(maxThreadsPerCPU) @@ -86,20 +89,20 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR hits") .flags(total) ; - demandMshrHits = mshr_hits[Read] + mshr_hits[Write]; + demandMshrHits = mshr_hits[Packet::ReadReq] + mshr_hits[Packet::WriteReq]; overallMshrHits .name(name + ".overall_mshr_hits") .desc("number of overall MSHR hits") .flags(total) ; - overallMshrHits = demandMshrHits + mshr_hits[Soft_Prefetch] + - mshr_hits[Hard_Prefetch]; + overallMshrHits = demandMshrHits + mshr_hits[Packet::SoftPFReq] + + mshr_hits[Packet::HardPFReq]; // MSHR miss statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_misses[access_idx] .init(maxThreadsPerCPU) @@ -114,20 +117,20 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR misses") .flags(total) ; - demandMshrMisses = mshr_misses[Read] + mshr_misses[Write]; + demandMshrMisses = mshr_misses[Packet::ReadReq] + mshr_misses[Packet::WriteReq]; overallMshrMisses .name(name + ".overall_mshr_misses") .desc("number of overall MSHR misses") .flags(total) ; - overallMshrMisses = demandMshrMisses + mshr_misses[Soft_Prefetch] + - mshr_misses[Hard_Prefetch]; + overallMshrMisses = demandMshrMisses + mshr_misses[Packet::SoftPFReq] + + mshr_misses[Packet::HardPFReq]; // MSHR miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_miss_latency[access_idx] .init(maxThreadsPerCPU) @@ -142,7 +145,8 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR miss cycles") .flags(total) ; - demandMshrMissLatency = mshr_miss_latency[Read] + mshr_miss_latency[Write]; + demandMshrMissLatency = mshr_miss_latency[Packet::ReadReq] + + mshr_miss_latency[Packet::WriteReq]; overallMshrMissLatency .name(name + ".overall_mshr_miss_latency") @@ -150,12 +154,12 @@ MissQueue::regStats(const string &name) .flags(total) ; overallMshrMissLatency = demandMshrMissLatency + - mshr_miss_latency[Soft_Prefetch] + mshr_miss_latency[Hard_Prefetch]; + mshr_miss_latency[Packet::SoftPFReq] + mshr_miss_latency[Packet::HardPFReq]; // MSHR uncacheable statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_uncacheable[access_idx] .init(maxThreadsPerCPU) @@ -170,13 +174,14 @@ MissQueue::regStats(const string &name) .desc("number of overall MSHR uncacheable misses") .flags(total) ; - overallMshrUncacheable = mshr_uncacheable[Read] + mshr_uncacheable[Write] - + mshr_uncacheable[Soft_Prefetch] + mshr_uncacheable[Hard_Prefetch]; + overallMshrUncacheable = mshr_uncacheable[Packet::ReadReq] + + mshr_uncacheable[Packet::WriteReq] + mshr_uncacheable[Packet::SoftPFReq] + + mshr_uncacheable[Packet::HardPFReq]; // MSHR miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_uncacheable_lat[access_idx] .init(maxThreadsPerCPU) @@ -191,15 +196,16 @@ MissQueue::regStats(const string &name) .desc("number of overall MSHR uncacheable cycles") .flags(total) ; - overallMshrUncacheableLatency = mshr_uncacheable_lat[Read] - + mshr_uncacheable_lat[Write] + mshr_uncacheable_lat[Soft_Prefetch] - + mshr_uncacheable_lat[Hard_Prefetch]; + overallMshrUncacheableLatency = mshr_uncacheable_lat[Packet::ReadReq] + + mshr_uncacheable_lat[Packet::WriteReq] + + mshr_uncacheable_lat[Packet::SoftPFReq] + + mshr_uncacheable_lat[Packet::HardPFReq]; #if 0 // MSHR access formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshrAccesses[access_idx] .name(name + "." + cstr + "_mshr_accesses") @@ -229,8 +235,8 @@ MissQueue::regStats(const string &name) // MSHR miss rate formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshrMissRate[access_idx] .name(name + "." + cstr + "_mshr_miss_rate") @@ -258,8 +264,8 @@ MissQueue::regStats(const string &name) // mshrMiss latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMshrMissLatency[access_idx] .name(name + "." + cstr + "_avg_mshr_miss_latency") @@ -287,8 +293,8 @@ MissQueue::regStats(const string &name) // mshrUncacheable latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMshrUncacheableLatency[access_idx] .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") @@ -354,7 +360,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) if (mq.isFull()) { cache->setBlocked(Blocked_NoMSHRs); } - if (pkt->cmd != Hard_Prefetch) { + if (pkt->cmd != Packet::HardPFReq) { //If we need to request the bus (not on HW prefetch), do so cache->setMasterRequest(Request_MSHR, time); } @@ -365,18 +371,21 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,pkt->size); + MSHR* mshr = wb.allocate(pkt,pkt->getSize()); mshr->order = order++; - if (cache->doData()){ - if (pkt->isCompressed()) { - delete [] mshr->pkt->data; - mshr->pkt->actualSize = pkt->actualSize; - mshr->pkt->data = new uint8_t[pkt->actualSize]; - memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); - } else { - memcpy(mshr->pkt->data, pkt->data, pkt->size); - } - } + +//REMOVING COMPRESSION FOR NOW +#if 0 + if (pkt->isCompressed()) { + mshr->pkt->deleteData(); + mshr->pkt->actualSize = pkt->actualSize; + mshr->pkt->data = new uint8_t[pkt->actualSize]; + memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); + } else { +#endif + memcpy(mshr->pkt->getPtr(), pkt->getPtr(), pkt->getSize()); + //{ + if (wb.isFull()) { cache->setBlocked(Blocked_NoWBBuffers); } @@ -397,15 +406,15 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) if (prefetchMiss) prefetcher->handleMiss(pkt, time); int size = blkSize; - Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); MSHR* mshr = NULL; if (!pkt->req->isUncacheable()) { - mshr = mq.findMatch(blkAddr, pkt->req->asid); + mshr = mq.findMatch(blkAddr, pkt->req->getAsid()); if (mshr) { //@todo remove hw_pf here mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - if (mshr->getThreadNum() != pkt->req->getThreadNum()) { - mshr->setThreadNum() = -1; + if (mshr->threadNum != pkt->req->getThreadNum()) { + mshr->threadNum = -1; } mq.allocateTarget(mshr, pkt); if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { @@ -429,14 +438,14 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) } else { //Count uncacheable accesses mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - size = pkt->size; + size = pkt->getSize(); } - if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - pkt->cmd.isNoResponse())) { + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, pkt->size, time); + mshr = allocateWrite(pkt, pkt->getSize(), time); return; } @@ -468,7 +477,7 @@ MissQueue::getPacket() pkt = wb.getReq(); // Need to search for earlier miss. MSHR *mshr = mq.findPending(pkt); - if (mshr && mshr->order < pkt->senderState->order) { + if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) { // Service misses in order until conflict is cleared. return mq.getReq(); } @@ -491,7 +500,7 @@ MissQueue::getPacket() //Update statistic on number of prefetches issued (hwpf_mshr_misses) mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; //It will request the bus for the future, but should clear that immedieatley - allocateMiss(pkt, pkt->size, curTick); + allocateMiss(pkt, pkt->getSize(), curTick); pkt = mq.getReq(); assert(pkt); //We should get back a req b/c we just put one in } @@ -503,7 +512,7 @@ void MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) { assert(pkt->senderState != 0); - MSHR * mshr = pkt->senderState; + MSHR * mshr = (MSHR*)pkt->senderState; mshr->originalCmd = pkt->cmd; if (pkt->isCacheFill() || pkt->isNoAllocate()) pkt->cmd = cmd; @@ -512,7 +521,7 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) void MissQueue::restoreOrigCmd(Packet * &pkt) { - pkt->cmd = pkt->senderState->originalCmd; + pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd; } void @@ -526,11 +535,11 @@ MissQueue::markInService(Packet * &pkt) * @todo Should include MSHRQueue pointer in MSHR to select the correct * one. */ - if ((!pkt->isCacheFill() && pkt->cmd.isWrite()) || pkt->cmd == Copy) { + if ((!pkt->isCacheFill() && pkt->isWrite())) { // Forwarding a write/ writeback, don't need to change // the command unblock = wb.isFull(); - wb.markInService(pkt->senderState); + wb.markInService((MSHR*)pkt->senderState); if (!wb.havePending()){ cache->clearMasterRequest(Request_WB); } @@ -541,11 +550,11 @@ MissQueue::markInService(Packet * &pkt) } } else { unblock = mq.isFull(); - mq.markInService(pkt->senderState); + mq.markInService((MSHR*)pkt->senderState); if (!mq.havePending()){ cache->clearMasterRequest(Request_MSHR); } - if (pkt->senderState->originalCmd == Hard_Prefetch) { + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", cache->name()); //Also clear pending if need be @@ -568,8 +577,8 @@ MissQueue::markInService(Packet * &pkt) void MissQueue::handleResponse(Packet * &pkt, Tick time) { - MSHR* mshr = pkt->senderState; - if (pkt->senderState->originalCmd == Hard_Prefetch) { + MSHR* mshr = (MSHR*)pkt->senderState; + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", cache->name()); } @@ -617,8 +626,9 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) assert(num_targets == 1); Packet * target = mshr->getTarget(); mshr->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), + target->getSize()); } cache->respond(target, time); assert(!mshr->hasTargets()); @@ -629,14 +639,15 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), + target->getSize()); } cache->respond(target, time); } } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { // If the wrtie buffer is full, we might unblock now unblock = wb.isFull(); wb.deallocate(mshr); @@ -660,12 +671,12 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } void -MissQueue::squash(int req->getThreadNum()ber) +MissQueue::squash(int threadNum) { bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; - if (noTargetMSHR && noTargetMSHR->setThreadNum() == req->getThreadNum()ber) { + if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) { noTargetMSHR = NULL; unblock = true; cause = Blocked_NoTargets; @@ -674,7 +685,7 @@ MissQueue::squash(int req->getThreadNum()ber) unblock = true; cause = Blocked_NoMSHRs; } - mq.squash(req->getThreadNum()ber); + mq.squash(threadNum); if (!mq.havePending()) { cache->clearMasterRequest(Request_MSHR); } @@ -701,9 +712,19 @@ MissQueue::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { // Generate request - Packet * pkt = buildWritebackReq(addr, asid, size, data, - compressed); + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + uint8_t *new_data = new uint8_t[size]; + pkt->dataDynamicArray(new_data); + if (data) { + memcpy(pkt->getPtr(), data, size); + } + if (compressed) { + pkt->flags |= COMPRESSED; + } + + ///All writebacks charged to same thread @todo figure this out writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 5c3c9fd1d0..fe8cbeea45 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -50,13 +50,15 @@ MSHR::MSHR() { inService = false; ntargets = 0; - setThreadNum() = -1; + threadNum = -1; } void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { + assert("NEED TO FIX YET\n" && 0); +#if 0 assert(targets.empty()); addr = _addr; asid = _asid; @@ -74,6 +76,7 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, pkt->req = target->req; allocateTarget(target); } +#endif } // Since we aren't sure if data is being used, don't copy here. @@ -83,17 +86,13 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, void MSHR::allocateAsBuffer(Packet * &target) { - addr = target->paddr; - asid = target->req->asid; - setThreadNum() = target->req->getThreadNum(); - pkt = new Packet(); - pkt->addr = target->addr; - pkt->dest = target->dest; - pkt->cmd = target->cmd; - pkt->size = target->size; - pkt->req = target->req; - pkt->data = new uint8_t[target->size]; - pkt->senderState = this; + addr = target->getAddr(); + asid = target->req->getAsid(); + threadNum = target->req->getThreadNum(); + pkt = new Packet(target->req, target->cmd, -1); + uint8_t *new_data = new uint8_t[target->getSize()]; + pkt->dataDynamicArray(new_data); + pkt->senderState = (Packet::SenderState*)this; pkt->time = curTick; } @@ -117,11 +116,11 @@ MSHR::allocateTarget(Packet * &target) //If we append an invalidate and we issued a read to the bus, //but now have some pending writes, we need to move //the invalidate to before the first non-read - if (inService && pkt->cmd.isRead() && target->cmd.isInvalidate()) { + if (inService && pkt->isRead() && target->isInvalidate()) { std::list temp; while (!targets.empty()) { - if (!targets.front()->cmd.isRead()) break; + if (!targets.front()->isRead()) break; //Place on top of temp stack temp.push_front(targets.front()); //Remove from targets @@ -148,8 +147,8 @@ MSHR::allocateTarget(Packet * &target) * @todo really prioritize the target commands. */ - if (!inService && target->cmd.isWrite()) { - pkt->cmd = WriteReq; + if (!inService && target->isWrite()) { + pkt->cmd = Packet::WriteReq; } } @@ -162,14 +161,14 @@ MSHR::dump() "inService: %d thread: %d\n" "Addr: %x asid: %d ntargets %d\n" "Targets:\n", - inService, getThreadNum(), addr, asid, ntargets); + inService, threadNum, addr, asid, ntargets); TargetListIterator tar_it = targets.begin(); for (int i = 0; i < ntargets; i++) { assert(tar_it != targets.end()); ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", - i, (*tar_it)->paddr, (*tar_it)->cmdToIndex()); + i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex()); tar_it++; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 3bd6d36d15..167aa26cd1 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -66,7 +66,7 @@ class MSHR { /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ - int getThreadNum(); + int threadNum; /** The request that is forwarded to the next level of the hierarchy. */ Packet * pkt; /** The number of currently allocated targets. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index ced43d30af..6516a99f85 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -94,17 +94,19 @@ MSHRQueue::findPending(Packet * &pkt) const MSHR::ConstIterator end = pendingList.end(); for (; i != end; ++i) { MSHR *mshr = *i; - if (mshr->addr < pkt->addr) { - if (mshr->addr + mshr->pkt->size > pkt->addr) { + if (mshr->addr < pkt->getAddr()) { + if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) { return mshr; } } else { - if (pkt->addr + pkt->size > mshr->addr) { + if (pkt->getAddr() + pkt->getSize() > mshr->addr) { return mshr; } } //need to check destination address for copies. + //TEMP NOT DOING COPIES +#if 0 if (mshr->pkt->cmd == Copy) { Addr dest = mshr->pkt->dest; if (dest < pkt->addr) { @@ -117,6 +119,7 @@ MSHRQueue::findPending(Packet * &pkt) const } } } +#endif } return NULL; } @@ -124,16 +127,16 @@ MSHRQueue::findPending(Packet * &pkt) const MSHR* MSHRQueue::allocate(Packet * &pkt, int size) { - Addr aligned_addr = pkt->addr & ~((Addr)size - 1); + Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { mshr->allocateAsBuffer(pkt); } else { assert(size !=0); - mshr->allocate(pkt->cmd, aligned_addr, pkt->req->req->asid, size, pkt); + mshr->allocate(pkt->cmd, aligned_addr, pkt->req->getAsid(), size, pkt); allocatedTargets += 1; } mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); @@ -149,7 +152,7 @@ MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target) MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - mshr->allocate(Read, addr, asid, size, target); + mshr->allocate(Packet::ReadReq, addr, asid, size, target); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->readyIter = pendingList.insert(pendingList.end(), mshr); @@ -164,7 +167,7 @@ MSHRQueue::allocateTargetList(Addr addr, int asid, int size) assert(mshr->getNumTargets() == 0); freeList.pop_front(); Packet * dummy; - mshr->allocate(Read, addr, asid, size, dummy); + mshr->allocate(Packet::ReadReq, addr, asid, size, dummy); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->inService = true; ++inServiceMSHRs; @@ -209,7 +212,7 @@ void MSHRQueue::markInService(MSHR* mshr) { //assert(mshr == pendingList.front()); - if (mshr->pkt->cmd.isNoResponse()) { + if (!mshr->pkt->needsResponse()) { assert(mshr->getNumTargets() == 0); deallocate(mshr); return; @@ -237,21 +240,18 @@ MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) } void -MSHRQueue::squash(int req->getThreadNum()ber) +MSHRQueue::squash(int threadNum) { MSHR::Iterator i = allocatedList.begin(); MSHR::Iterator end = allocatedList.end(); for (; i != end;) { MSHR *mshr = *i; - if (mshr->setThreadNum() == req->getThreadNum()ber) { + if (mshr->threadNum == threadNum) { while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - assert(target->req->setThreadNum() == req->getThreadNum()ber); - if (target->completionEvent != NULL) { - delete target->completionEvent; - } + assert(target->req->getThreadNum() == threadNum); target = NULL; } assert(!mshr->hasTargets()); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 3d37686768..0369745c9b 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -52,6 +52,9 @@ typedef std::list PacketList; #define NACKED_LINE 1 << 0 #define SATISFIED 1 << 1 #define SHARED_LINE 1 << 2 +#define CACHE_LINE_FILL 1 << 3 +#define COMPRESSED 1 << 4 +#define NO_ALLOCATE 1 << 5 //For statistics we need max number of commands, hard code it at //20 for now. @todo fix later @@ -66,6 +69,10 @@ typedef std::list PacketList; */ class Packet { + public: + /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */ + uint64_t flags; + private: /** A pointer to the data being transfered. It can be differnt * sizes at each level of the heirarchy so it belongs in the @@ -93,6 +100,9 @@ class Packet /** The size of the request or transfer. */ int size; + /** The offset within the block that represents the data. */ + int offset; + /** Device address (e.g., bus ID) of the source of the * transaction. The source is not responsible for setting this * field; it is set implicitly by the interconnect when the @@ -110,6 +120,9 @@ class Packet bool addrSizeValid; /** Is the 'src' field valid? */ bool srcValid; + /** Is the offset valid. */ + bool offsetValid; + public: @@ -171,6 +184,7 @@ class Packet /** List of all commands associated with a packet. */ enum Command { + InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, @@ -183,7 +197,10 @@ class Packet HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, InvalidateReq = IsInvalidate | IsRequest, WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | NeedsResponse + UpgradeReq = IsInvalidate | NeedsResponse, + UpgradeResp = IsInvalidate | IsResponse, + ReadExReq = IsRead | IsInvalidate | NeedsResponse, + ReadExResp = IsRead | IsInvalidate | IsResponse }; /** Return the string name of the cmd field (for debugging and @@ -206,8 +223,8 @@ class Packet bool needsResponse() { return (cmd & NeedsResponse) != 0; } bool isInvalidate() { return (cmd * IsInvalidate) != 0; } - bool isCacheFill() { assert("Unimplemented yet\n" && 0); } - bool isNoAllocate() { assert("Unimplemented yet\n" && 0); } + bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } + bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } /** Possible results of a packet's request. */ enum Result @@ -232,6 +249,10 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } + int getOffset() const { assert(offsetValid); return offset; } + + void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } + void cmdOverride(Command newCmd) { cmd = newCmd; } /** Constructor. Note that a Request object must be constructed * first, but the Requests's physical address and size fields @@ -241,10 +262,25 @@ class Packet : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), addrSizeValid(_req->validPaddr), - srcValid(false), + srcValid(false), offsetValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { + flags = 0; + } + + /** Alternate constructor if you are trying to create a packet with + * a request that is for a whole block, not the address from the req. + * this allows for overriding the size/addr of the req.*/ + Packet(Request *_req, Command _cmd, short _dest, int _blkSize) + : data(NULL), staticData(false), dynamicData(false), arrayData(false), + addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), + offset(_req->paddr & (_blkSize - 1)), dest(_dest), + addrSizeValid(_req->validPaddr), srcValid(false), offsetValid(true), + req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), + result(Unknown) + { + flags = 0; } /** Destructor. */ From de90be348239a0a58ebb659dfc6a2f2fe5909292 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 29 Jun 2006 16:52:47 -0400 Subject: [PATCH 077/152] Update the readme to point people to m5.eecs.umich.edu start a new release section in RELEASE_NOTES add AUTHORS file that still needs work README: Update the readme to point people to m5.eecs.umich.edu RELEASE_NOTES: start a new release section --HG-- extra : convert_revision : 4c51e4255aecb67b10f18337428e5af114759d2e --- AUTHORS | 42 ++++++++++++++++++++++++ README | 88 +++++++++++---------------------------------------- RELEASE_NOTES | 8 +++++ 3 files changed, 68 insertions(+), 70 deletions(-) create mode 100644 AUTHORS diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000000..8904070d89 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,42 @@ +Steven K. Reinhardt +----------------------- + +Nathan L. Binkert +----------------------- + +Erik G. Hallnor +----------------------- + +Steve E. Raasch +----------------------- + +Lisa R. Hsu +----------------------- + +Ali G. Saidi +----------------------- + +Andrew L. Schultz +----------------------- + +Kevin T. Lim +----------------------- + +Ronald G. Dreslinski Jr +----------------------- + +Gabriel Black +----------------------- + +Korey Sewell +----------------------- + +David Green +----------------------- + +Benjamin S. Nash +----------------------- + +Miguel J. Serrano +----------------------- + diff --git a/README b/README index 62214d8c5c..7bcbe95239 100644 --- a/README +++ b/README @@ -1,21 +1,27 @@ -This is release m5_1.1 of the M5 simulator. +This is release 2.0 of the M5 simulator. -This file contains brief "getting started" instructions. For more -information, see http://m5.eecs.umich.edu. If you have questions, -please send mail to m5sim-users@lists.sourceforge.net. +For information about building the simulator and getting started please refer +to: http://m5.eecs.umich.edu/ + +Specific Pages of Interest are: +http://m5.eecs.umich.edu/wiki/index.php/Compiling_M5 +http://m5.eecs.umich.edu/wiki/index.php/Running_M5 + +If you have questions, please send mail to m5sim-users@lists.sourceforge.net. WHAT'S INCLUDED (AND NOT) ------------------------- The basic source release includes these subdirectories: - - m5: the simulator itself - - m5-test: regression tests - - ext: less-common external packages needed to build m5 - - alpha-system: source for Alpha console and PALcode + - m5: + - src: source code of the m5 simulator + - test: regression tests + - ext: less-common external packages needed to build m5 + - system/alpha: source for Alpha console and PALcode To run full-system simulations, you will need compiled console, PALcode, and kernel binaries and one or more disk images. These files -are collected in a separate archive, m5_system_1.1.tar.bz2. This file +are collected in a separate archive, m5_system_2.0.tar.bz2. This file is included on the CD release, or you can download it separately from Sourceforge. @@ -31,66 +37,8 @@ set of Linux source patches (linux_m5-2.6.8.1.diff), and the scons program needed to build M5. If you do not have the CD, the same HTML documentation is available online at http://m5.eecs.umich.edu/docs, the Linux source patches are available at -http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, and the scons -program is available from http://www.scons.org. +http://m5.eecs.umich.edu/dist/linux_m5-2.6.8.1.diff, the scons +program is available from http://www.scons.org, and swig is available from +http://www.swig.org. -WHAT'S NEEDED -------------- -- GCC version 3.3 or newer -- Python 2.3 or newer -- SCons 0.96.1 or newer (see http://www.scons.org) - -WHAT'S RECOMMENDED ------------------- -- MySQL (for statistics complex statistics storage/retrieval) -- Python-MysqlDB (for statistics analysis) - -GETTING STARTED ---------------- - -There are two different build targets and three optimizations levels: - -Target: -------- -ALPHA_SE - Syscall emulation simulation -ALPHA_FS - Full system simulation - -Optimization: -------------- -m5.debug - debug version of the code with tracing and without optimization -m5.opt - optimized version of code with tracing -m5.fast - optimized version of the code without tracing and asserts - -Different targets are built in different subdirectories of m5/build. -Binaries with the same target but different optimization levels share -the same directory. Note that you can build m5 in any directory you -choose;p just configure the target directory using the 'mkbuilddir' -script in m5/build. - -The following steps will build and test the simulator. The variable -"$top" refers to the top directory where you've unpacked the files, -i.e., the one containing the m5, m5-test, and ext directories. If you -have a multiprocessor system, you should give scons a "-j N" argument (like -make) to run N jobs in parallel. - -To build and test the syscall-emulation simulator: - - cd $top/m5/build - scons ALPHA_SE/test/opt/quick - -This process takes under 10 minutes on a dual 3GHz Xeon system (using -the '-j 4' option). - -To build and test the full-system simulator: - -1. Unpack the full-system binaries from m5_system_1.1.tar.bz2. (See - above for directions on obtaining this file if you don't have it.) - This package includes disk images and kernel, palcode, and console - binaries for Linux and FreeBSD. -2. Edit the SYSTEMDIR search path in $top/m5-test/SysPaths.py to - include the path to your local copy of the binaries. -3. In $top/m5/build, run "scons ALPHA_FS/test/opt/quick". - -This process also takes under 10 minutes on a dual 3GHz Xeon system -(again using the '-j 4' option). diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 983c9b2e91..6eb9b1844f 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,11 @@ +XXX. X, 2006: m5_2.0 +-------------------- +Major update to M5 including: +- New CPU model +- Sew memory system +- More extensive python integration +- Preliminary syscall emulation support for MIPS and SPARC + Oct. 8, 2005: m5_1.1 -------------------- Update release for IOSCA workshop mini-tutorial. New features include: From f64c175f9ae81be3c002a82ea14a2844a7ee100e Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 29 Jun 2006 19:40:12 -0400 Subject: [PATCH 078/152] Add in support for quiescing the system, taking checkpoints, restoring from checkpoints, changing memory modes, and switching CPUs. Key new functions that can be called on the m5 object at the python interpreter: doQuiesce(root) - A helper function that quiesces the object passed in and all of its children. resume(root) - Another helper function that tells the object and all of its children that the quiesce is over. checkpoint(root) - Takes a checkpoint of the system. Checkpoint directory must be set before hand. setCheckpointDir(name) - Sets the checkpoint directory. restoreCheckpoint(root) - Restores the values from the checkpoint located in the checkpoint directory. changeToAtomic(system) - Changes the system and all of its children to atomic memory mode. changeToTiming(system) - Changes the system and all of its children to timing memory mode. switchCpus(list) - Takes in a list of tuples, where each tuple is a pair of (old CPU, new CPU). Quiesces the old CPUs, and then switches over to the new CPUs. src/SConscript: Remove serializer, replaced by python code. src/python/m5/__init__.py: Updates to support quiescing, checkpointing, changing memory modes, and switching CPUs. src/python/m5/config.py: Several functions defined on the SimObject for quiescing, changing timing modes, and switching CPUs src/sim/main.cc: Add some extra functions that are exported to python through SWIG. src/sim/serialize.cc: Change serialization around a bit. Now it is controlled through Python, so there's no need for SerializeEvents or SerializeParams. Also add in a new unserializeAll() function that loads a checkpoint and handles unserializing all objects. src/sim/serialize.hh: Add unserializeAll function and a setCheckpointName function. src/sim/sim_events.cc: Add process() function for CountedQuiesceEvent, which calls exitSimLoop() once its counter reaches 0. src/sim/sim_events.hh: Add in a CountedQuiesceEvent, which is used when the system is preparing to quiesce. Any objects that can't be quiesced immediately are given a pointer to a CountedQuiesceEvent. The event has its counter set via Python, and as objects finish quiescing they call process() on the event. Eventually the event causes the simulation to stop once all objects have quiesced. src/sim/sim_object.cc: Add a few functions for quiescing, checkpointing, and changing memory modes. src/sim/sim_object.hh: Add a state variable to all SimObjects that tracks both the timing mode of the object and the quiesce state of the object. Currently this isn't serialized, and I'm not sure it needs to be so long as the timing mode starts up the same after a checkpoint. --HG-- extra : convert_revision : a8c738d3911c68d5a7caf7de24d732dcc62cfb61 --- src/SConscript | 1 - src/python/m5/__init__.py | 84 ++++++++++++++++++++++++-- src/python/m5/config.py | 27 +++++++++ src/sim/main.cc | 42 +++++++++++++ src/sim/serialize.cc | 123 +++++++------------------------------- src/sim/serialize.hh | 4 ++ src/sim/sim_events.cc | 8 +++ src/sim/sim_events.hh | 21 +++++++ src/sim/sim_object.cc | 66 +++++++++++++++++++- src/sim/sim_object.hh | 29 ++++++--- 10 files changed, 288 insertions(+), 117 deletions(-) diff --git a/src/SConscript b/src/SConscript index 124f88708a..5846435353 100644 --- a/src/SConscript +++ b/src/SConscript @@ -62,7 +62,6 @@ base_sources = Split(''' base/range.cc base/random.cc base/sat_counter.cc - base/serializer.cc base/socket.cc base/statistics.cc base/str.cc diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index a7e653fc2d..828165d155 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -34,7 +34,7 @@ import cc_main # import a few SWIG-wrapped items (those that are likely to be used # directly by user scripts) completely into this module for # convenience -from cc_main import simulate, SimLoopExitEvent +from cc_main import simulate, SimLoopExitEvent, setCheckpointDir # import the m5 compile options import defines @@ -117,10 +117,6 @@ def debugBreak(option, opt_str, value, parser): def statsTextFile(option, opt_str, value, parser): objects.Statistics.text_file = value -# Extra list to help for options that are true or false -TrueOrFalse = ['True', 'False'] -TorF = "True | False" - # Standard optparse options. Need to be explicitly included by the # user script when it calls optparse.OptionParser(). standardOptions = [ @@ -216,3 +212,81 @@ atexit.register(cc_main.doExitCleanup) # just doing an 'import m5' (without an 'import m5.objects'). May not # matter since most scripts will probably 'from m5.objects import *'. import objects + +def doQuiesce(root): + quiesce = cc_main.createCountedQuiesce() + unready_objects = root.startQuiesce(quiesce, True) + # If we've got some objects that can't quiesce immediately, then simulate + if unready_objects > 0: + quiesce.setCount(unready_objects) + simulate() + cc_main.cleanupCountedQuiesce(quiesce) + +def resume(root): + root.resume() + +def checkpoint(root): + if not isinstance(root, objects.Root): + raise TypeError, "Object is not a root object. Checkpoint must be called on a root object." + doQuiesce(root) + print "Writing checkpoint" + cc_main.serializeAll() + resume(root) + +def restoreCheckpoint(root): + print "Restoring from checkpoint" + cc_main.unserializeAll() + +def changeToAtomic(system): + if not isinstance(system, objects.Root) and not isinstance(system, System): + raise TypeError, "Object is not a root or system object. Checkpoint must be " + "called on a root object." + doQuiesce(system) + print "Changing memory mode to atomic" + system.changeTiming(cc_main.SimObject.Atomic) + resume(system) + +def changeToTiming(system): + if not isinstance(system, objects.Root) and not isinstance(system, System): + raise TypeError, "Object is not a root or system object. Checkpoint must be " + "called on a root object." + doQuiesce(system) + print "Changing memory mode to timing" + system.changeTiming(cc_main.SimObject.Timing) + resume(system) + +def switchCpus(cpuList): + if not isinstance(cpuList, list): + raise RuntimeError, "Must pass a list to this function" + for i in cpuList: + if not isinstance(i, tuple): + raise RuntimeError, "List must have tuples of (oldCPU,newCPU)" + + [old_cpus, new_cpus] = zip(*cpuList) + + for cpu in old_cpus: + if not isinstance(cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU", cpu + for cpu in new_cpus: + if not isinstance(cpu, objects.BaseCPU): + raise TypeError, "%s is not of type BaseCPU", cpu + + # Quiesce all of the individual CPUs + quiesce = cc_main.createCountedQuiesce() + unready_cpus = 0 + for old_cpu in old_cpus: + unready_cpus += old_cpu.startQuiesce(quiesce, False) + # If we've got some objects that can't quiesce immediately, then simulate + if unready_cpus > 0: + quiesce.setCount(unready_cpus) + simulate() + cc_main.cleanupCountedQuiesce(quiesce) + # Now all of the CPUs are ready to be switched out + for old_cpu in old_cpus: + old_cpu._ccObject.switchOut() + index = 0 + print "Switching CPUs" + for new_cpu in new_cpus: + new_cpu.takeOverFrom(old_cpus[index]) + new_cpu._ccObject.resume() + index += 1 diff --git a/src/python/m5/config.py b/src/python/m5/config.py index c29477465b..adabe07439 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -543,6 +543,33 @@ class SimObject(object): for child in self._children.itervalues(): child.connectPorts() + def startQuiesce(self, quiesce_event, recursive): + count = 0 + # ParamContexts don't serialize + if isinstance(self, SimObject) and not isinstance(self, ParamContext): + if self._ccObject.quiesce(quiesce_event): + count = 1 + if recursive: + for child in self._children.itervalues(): + count += child.startQuiesce(quiesce_event, True) + return count + + def resume(self): + if isinstance(self, SimObject) and not isinstance(self, ParamContext): + self._ccObject.resume() + for child in self._children.itervalues(): + child.resume() + + def changeTiming(self, mode): + if isinstance(self, SimObject) and not isinstance(self, ParamContext): + self._ccObject.setMemoryMode(mode) + for child in self._children.itervalues(): + child.changeTiming(mode) + + def takeOverFrom(self, old_cpu): + cpu_ptr = cc_main.convertToBaseCPUPtr(old_cpu._ccObject) + self._ccObject.takeOverFrom(cpu_ptr) + # generate output file for 'dot' to display as a pretty graph. # this code is currently broken. def outputDot(self, dot): diff --git a/src/sim/main.cc b/src/sim/main.cc index bf844da7f3..3eb7fa95d4 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -62,6 +62,7 @@ #include "sim/async.hh" #include "sim/builder.hh" #include "sim/host.hh" +#include "sim/serialize.hh" #include "sim/sim_events.hh" #include "sim/sim_exit.hh" #include "sim/sim_object.hh" @@ -521,6 +522,37 @@ simulate(Tick num_cycles = -1) // not reached... only exit is return on SimLoopExitEvent } +Event * +createCountedQuiesce() +{ + return new CountedQuiesceEvent(); +} + +void +cleanupCountedQuiesce(Event *counted_quiesce) +{ + CountedQuiesceEvent *event = + dynamic_cast(counted_quiesce); + if (event == NULL) { + fatal("Called cleanupCountedQuiesce() on an event that was not " + "a CountedQuiesceEvent."); + } + assert(event->getCount() == 0); + delete event; +} + +void +serializeAll() +{ + Serializable::serializeAll(); +} + +void +unserializeAll() +{ + Serializable::unserializeAll(); +} + /** * Queue of C++ callbacks to invoke on simulator exit. */ @@ -535,6 +567,16 @@ registerExitCallback(Callback *callback) exitCallbacks.add(callback); } +BaseCPU * +convertToBaseCPUPtr(SimObject *obj) +{ + BaseCPU *ptr = dynamic_cast(obj); + + if (ptr == NULL) + warn("Casting to BaseCPU pointer failed"); + return ptr; +} + /** * Do C++ simulator exit processing. Exported to SWIG to be invoked * when simulator terminates via Python's atexit mechanism. diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 0e31391166..7450d7b7e0 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -244,14 +244,22 @@ Serializable::serializeAll() globals.serialize(outstream); SimObject::serializeAll(outstream); - - assert(Serializable::ckptPrevCount + 1 == Serializable::ckptCount); - Serializable::ckptPrevCount++; - if (ckptMaxCount && ++ckptCount >= ckptMaxCount) - exitSimLoop(curTick + 1, "Maximum number of checkpoints dropped"); - } +void +Serializable::unserializeAll() +{ + string dir = Checkpoint::dir(); + string cpt_file = dir + Checkpoint::baseFilename; + string section = ""; + + DPRINTFR(Config, "Loading checkpoint dir '%s'\n", + dir); + Checkpoint *cp = new Checkpoint(dir, section); + unserializeGlobals(cp); + + SimObject::unserializeAll(cp); +} void Serializable::unserializeGlobals(Checkpoint *cp) @@ -259,41 +267,18 @@ Serializable::unserializeGlobals(Checkpoint *cp) globals.unserialize(cp); } - -class SerializeEvent : public Event -{ - protected: - Tick repeat; - - public: - SerializeEvent(Tick _when, Tick _repeat); - virtual void process(); - virtual void serialize(std::ostream &os) - { - panic("Cannot serialize the SerializeEvent"); - } - -}; - -SerializeEvent::SerializeEvent(Tick _when, Tick _repeat) - : Event(&mainEventQueue, Serialize_Pri), repeat(_repeat) -{ - setFlags(AutoDelete); - schedule(_when); -} - -void -SerializeEvent::process() -{ - Serializable::serializeAll(); - if (repeat) - schedule(curTick + repeat); -} - const char *Checkpoint::baseFilename = "m5.cpt"; static string checkpointDirBase; +void +setCheckpointDir(const std::string &name) +{ + checkpointDirBase = name; + if (checkpointDirBase[checkpointDirBase.size() - 1] != '/') + checkpointDirBase += "/"; +} + string Checkpoint::dir() { @@ -303,76 +288,12 @@ Checkpoint::dir() csprintf(checkpointDirBase, curTick) : checkpointDirBase; } -void -Checkpoint::setup(Tick when, Tick period) -{ - new SerializeEvent(when, period); -} - -class SerializeParamContext : public ParamContext -{ - private: - SerializeEvent *event; - - public: - SerializeParamContext(const string §ion); - ~SerializeParamContext(); - void checkParams(); -}; - -SerializeParamContext serialParams("serialize"); - -Param serialize_dir(&serialParams, "dir", - "dir to stick checkpoint in " - "(sprintf format with cycle #)"); - -Param serialize_cycle(&serialParams, - "cycle", - "cycle to serialize", - 0); - -Param serialize_period(&serialParams, - "period", - "period to repeat serializations", - 0); - -Param serialize_count(&serialParams, "count", - "maximum number of checkpoints to drop"); - -SerializeParamContext::SerializeParamContext(const string §ion) - : ParamContext(section), event(NULL) -{ } - -SerializeParamContext::~SerializeParamContext() -{ -} - -void -SerializeParamContext::checkParams() -{ - checkpointDirBase = simout.resolve(serialize_dir); - - // guarantee that directory ends with a '/' - if (checkpointDirBase[checkpointDirBase.size() - 1] != '/') - checkpointDirBase += "/"; - - if (serialize_cycle > 0) - Checkpoint::setup(serialize_cycle, serialize_period); - - Serializable::ckptMaxCount = serialize_count; -} - void debug_serialize() { Serializable::serializeAll(); } -void -debug_serialize(Tick when) -{ - new SerializeEvent(when, 0); -} //////////////////////////////////////////////////////////////////////// // diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 64ed6142fd..5a820b27ea 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -127,6 +127,7 @@ class Serializable static int ckptMaxCount; static int ckptPrevCount; static void serializeAll(); + static void unserializeAll(); static void unserializeGlobals(Checkpoint *cp); }; @@ -204,6 +205,9 @@ class SerializableClass SerializableClass the##OBJ_CLASS##Class(CLASS_NAME, \ OBJ_CLASS::createForUnserialize); +void +setCheckpointName(const std::string &name); + class Checkpoint { private: diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index b7901832de..97f7ae03cb 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -78,6 +78,14 @@ exitSimLoop(const std::string &message, int exit_code) exitSimLoop(curTick, message, exit_code); } +void +CountedQuiesceEvent::process() +{ + if (--count == 0) { + exitSimLoop("Finished quiesce"); + } +} + // // constructor: automatically schedules at specified time // diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 4f305ad382..50368f258f 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -44,6 +44,11 @@ class SimLoopExitEvent : public Event int code; public: + // Default constructor. Only really used for derived classes. + SimLoopExitEvent() + : Event(&mainEventQueue, Sim_Exit_Pri) + { } + SimLoopExitEvent(Tick _when, const std::string &_cause, int c = 0) : Event(&mainEventQueue, Sim_Exit_Pri), cause(_cause), code(c) @@ -62,6 +67,22 @@ class SimLoopExitEvent : public Event virtual const char *description(); }; +class CountedQuiesceEvent : public SimLoopExitEvent +{ + private: + // Count down to quiescing + int count; + public: + CountedQuiesceEvent() + : count(0) + { } + void process(); + + void setCount(int _count) { count = _count; } + + int getCount() { return count; } +}; + // // Event class to terminate simulation after 'n' related events have // occurred using a shared counter: used to terminate when *all* diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 97e6de439a..5752e4ef15 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -73,6 +73,7 @@ SimObject::SimObject(Params *p) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); + state = Atomic; } // @@ -88,6 +89,7 @@ SimObject::SimObject(const string &_name) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); + state = Atomic; } void @@ -219,6 +221,24 @@ SimObject::serializeAll(ostream &os) } } +void +SimObject::unserializeAll(Checkpoint *cp) +{ + SimObjectList::reverse_iterator ri = simObjectList.rbegin(); + SimObjectList::reverse_iterator rend = simObjectList.rend(); + + for (; ri != rend; ++ri) { + SimObject *obj = *ri; + DPRINTFR(Config, "Unserializing '%s'\n", + obj->name()); + if(cp->sectionExists(obj->name())) + obj->unserialize(cp, obj->name()); + else + warn("Not unserializing '%s': no section found in checkpoint.\n", + obj->name()); + } +} + #ifdef DEBUG // // static function: flag which objects should have the debugger break @@ -251,10 +271,50 @@ SimObject::recordEvent(const std::string &stat) Stats::recordEvent(stat); } -void -SimObject::drain(Serializer *serializer) +bool +SimObject::quiesce(Event *quiesce_event) { - serializer->signalDrained(); + if (state != QuiescedAtomic && state != Atomic) { + panic("Must implement your own quiesce function if it is to be used " + "in timing mode!"); + } + state = QuiescedAtomic; + return false; +} + +void +SimObject::resume() +{ + if (state == QuiescedAtomic) { + state = Atomic; + } else if (state == QuiescedTiming) { + state = Timing; + } +} + +void +SimObject::setMemoryMode(State new_mode) +{ + assert(new_mode == Timing || new_mode == Atomic); + if (state == QuiescedAtomic && new_mode == Timing) { + state = QuiescedTiming; + } else if (state == QuiescedTiming && new_mode == Atomic) { + state = QuiescedAtomic; + } else { + state = new_mode; + } +} + +void +SimObject::switchOut() +{ + panic("Unimplemented!"); +} + +void +SimObject::takeOverFrom(BaseCPU *cpu) +{ + panic("Unimplemented!"); } DEFINE_SIM_OBJECT_CLASS_NAME("SimObject", SimObject) diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index 84e9376a0f..e0b21782fb 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -44,7 +44,8 @@ #include "sim/serialize.hh" #include "sim/startup.hh" -class Serializer; +class BaseCPU; +class Event; /* * Abstract superclass for simulation objects. Represents things that @@ -58,15 +59,26 @@ class SimObject : public Serializable, protected StartupCallback std::string name; }; + enum State { + Atomic, + Timing, + Quiescing, + QuiescedAtomic, + QuiescedTiming + }; + protected: Params *_params; + State state; + + void changeState(State new_state) { state = new_state; } public: const Params *params() const { return _params; } - private: - friend class Serializer; + State getState() { return state; } + private: typedef std::vector SimObjectList; // list of all instantiated simulation objects @@ -100,13 +112,16 @@ class SimObject : public Serializable, protected StartupCallback // static: call nameOut() & serialize() on all SimObjects static void serializeAll(std::ostream &); + static void unserializeAll(Checkpoint *cp); // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model - virtual void drain(Serializer *serializer); - virtual void resume() { return;} ; - virtual void serializationComplete() - { assert(0 && "Unimplemented"); }; + // Quiesce returns true if the SimObject cannot quiesce immediately. + virtual bool quiesce(Event *quiesce_event); + virtual void resume(); + virtual void setMemoryMode(State new_mode); + virtual void switchOut(); + virtual void takeOverFrom(BaseCPU *cpu); #ifdef DEBUG public: From 2416ba606aed95098ce8e43f813fa57dd9f91974 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 29 Jun 2006 19:45:24 -0400 Subject: [PATCH 079/152] Various fixes for the CPU models to support the features that have been moved to python. src/cpu/base.cc: src/cpu/base.hh: src/cpu/simple/atomic.hh: Switching out no longer takes a sampler. src/cpu/simple/atomic.cc: Fix up switching out. Also fix up serialization; the nameOut() was messing up the ordering. src/cpu/simple/timing.cc: Add in quiesce, fix up serialization. src/cpu/simple/timing.hh: Add in queisce, fix up serialization. --HG-- extra : convert_revision : 9d59d53bdf269d4d82fb119e5ae7c8a5d475880b --- src/cpu/base.cc | 2 +- src/cpu/base.hh | 2 +- src/cpu/simple/atomic.cc | 15 ++++----- src/cpu/simple/atomic.hh | 2 +- src/cpu/simple/timing.cc | 67 +++++++++++++++++++++++++++++++++++----- src/cpu/simple/timing.hh | 10 +++++- 6 files changed, 78 insertions(+), 20 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 55c04c4989..40cec416be 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -237,7 +237,7 @@ BaseCPU::registerThreadContexts() void -BaseCPU::switchOut(Sampler *sampler) +BaseCPU::switchOut() { panic("This CPU doesn't support sampling!"); } diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 43122f2387..51f3bb905d 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -148,7 +148,7 @@ class BaseCPU : public SimObject /// Prepare for another CPU to take over execution. When it is /// is ready (drained pipe) it signals the sampler. - virtual void switchOut(Sampler *); + virtual void switchOut(); /// Take over execution from the given CPU. Used for warm-up and /// sampling. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 071193f02e..7be74e97e7 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -145,8 +145,8 @@ AtomicSimpleCPU::~AtomicSimpleCPU() void AtomicSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); nameOut(os, csprintf("%s.tickEvent", name())); tickEvent.serialize(os); } @@ -154,21 +154,18 @@ AtomicSimpleCPU::serialize(ostream &os) void AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } void -AtomicSimpleCPU::switchOut(Sampler *s) +AtomicSimpleCPU::switchOut() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + assert(status() == Running || status() == Idle); + _status = SwitchedOut; - tickEvent.squash(); - } - sampler->signalSwitched(); + tickEvent.squash(); } diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 7f4956da9c..951a8da063 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -125,7 +125,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index c99db8fbfa..0729f94898 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -88,6 +88,8 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; + quiesceEvent = NULL; + state = SimObject::Timing; } @@ -98,25 +100,54 @@ TimingSimpleCPU::~TimingSimpleCPU() void TimingSimpleCPU::serialize(ostream &os) { - BaseSimpleCPU::serialize(os); SERIALIZE_ENUM(_status); + BaseSimpleCPU::serialize(os); } void TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) { - BaseSimpleCPU::unserialize(cp, section); UNSERIALIZE_ENUM(_status); + BaseSimpleCPU::unserialize(cp, section); +} + +bool +TimingSimpleCPU::quiesce(Event *quiesce_event) +{ + // TimingSimpleCPU is ready to quiesce if it's not waiting for + // an access to complete. + if (status() == Idle || status() == Running || status() == SwitchedOut) { + DPRINTF(Config, "Ready to quiesce\n"); + return false; + } else { + DPRINTF(Config, "Waiting to quiesce\n"); + changeState(SimObject::Quiescing); + quiesceEvent = quiesce_event; + return true; + } } void -TimingSimpleCPU::switchOut(Sampler *s) +TimingSimpleCPU::resume() { - sampler = s; - if (status() == Running) { - _status = SwitchedOut; + if (_status != SwitchedOut && _status != Idle) { + Event *e = + new EventWrapper(this, true); + e->schedule(curTick); } - sampler->signalSwitched(); +} + +void +TimingSimpleCPU::setMemoryMode(State new_mode) +{ + assert(new_mode == SimObject::Timing); +} + +void +TimingSimpleCPU::switchOut() +{ + assert(status() == Running || status() == Idle); + _status = SwitchedOut; } @@ -383,11 +414,17 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) // instruction assert(pkt->result == Packet::Success); assert(_status == IcacheWaitResponse); + _status = Running; delete pkt->req; delete pkt; + if (getState() == SimObject::Quiescing) { + completeQuiesce(); + return; + } + preExecute(); if (curStaticInst->isMemRef() && !curStaticInst->isDataPrefetch()) { // load or store: just send to dcache @@ -440,6 +477,15 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; + if (getState() == SimObject::Quiescing) { + completeQuiesce(); + + delete pkt->req; + delete pkt; + + return; + } + Fault fault = curStaticInst->completeAcc(pkt, this, traceData); delete pkt->req; @@ -450,6 +496,13 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) } +void +TimingSimpleCPU::completeQuiesce() +{ + DPRINTF(Config, "Done quiescing\n"); + changeState(SimObject::QuiescedTiming); + quiesceEvent->process(); +} bool TimingSimpleCPU::DcachePort::recvTiming(Packet *pkt) diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index ab0b2d2ca2..d91144e4a6 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,6 +64,8 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } + Event *quiesceEvent; + private: class CpuPort : public Port @@ -131,7 +133,11 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - void switchOut(Sampler *s); + virtual bool quiesce(Event *quiesce_event); + virtual void resume(); + virtual void setMemoryMode(State new_mode); + + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); virtual void activateContext(int thread_num, int delay); @@ -147,6 +153,8 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeIfetch(Packet *); void completeDataAccess(Packet *); void advanceInst(Fault fault); + private: + void completeQuiesce(); }; #endif // __CPU_SIMPLE_TIMING_HH__ From ad6788493c09aec456a1136f126abde7000696ab Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 29 Jun 2006 21:34:01 -0400 Subject: [PATCH 080/152] Remove function that no longer can be used. We should figure out if we want to allow the m5checkpoint pseudoinstruction or not. src/sim/pseudo_inst.cc: Remove the setup function from Checkpoint. I'm not sure what we want to do with this pseudoinst. src/sim/serialize.hh: Remove setup function. --HG-- extra : convert_revision : 5ff494d816e2d8a7fe65a3d13037608003388d8f --- src/sim/pseudo_inst.cc | 6 ------ src/sim/serialize.hh | 3 --- 2 files changed, 9 deletions(-) diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index ae52cdd412..b2854e491d 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -209,12 +209,6 @@ namespace AlphaPseudo { if (!doCheckpointInsts) return; - - - Tick when = curTick + delay * Clock::Int::ns; - Tick repeat = period * Clock::Int::ns; - - Checkpoint::setup(when, repeat); } uint64_t diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 5a820b27ea..a80dc99e4c 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -241,9 +241,6 @@ class Checkpoint // Filename for base checkpoint file within directory. static const char *baseFilename; - - // Set up a checkpoint creation event or series of events. - static void setup(Tick when, Tick period = 0); }; #endif // __SERIALIZE_HH__ From 335fa4bde33f60bf61dceb04eb61aeade5cee76c Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 10:25:25 -0400 Subject: [PATCH 081/152] All files compile in the mem directory except cache_builder Missing some functionality (like split caches and copy support) src/SConscript: Typo src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/prefetch/ghb_prefetcher.hh: src/mem/cache/prefetch/stride_prefetcher.hh: src/mem/cache/prefetch/tagged_prefetcher_impl.hh: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/fa_lru.hh: src/mem/cache/tags/iic.cc: src/mem/cache/tags/iic.hh: src/mem/cache/tags/lru.cc: src/mem/cache/tags/lru.hh: src/mem/cache/tags/split.cc: src/mem/cache/tags/split.hh: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lifo.hh: src/mem/cache/tags/split_lru.cc: src/mem/cache/tags/split_lru.hh: src/mem/packet.hh: src/mem/request.hh: Fix so it compiles --HG-- extra : convert_revision : 0d87d84f6e9445bab655c0cb0f8541bbf6eab904 --- src/SConscript | 2 +- src/mem/cache/prefetch/base_prefetcher.cc | 26 +++++----- src/mem/cache/prefetch/ghb_prefetcher.hh | 4 +- src/mem/cache/prefetch/stride_prefetcher.hh | 2 +- .../cache/prefetch/tagged_prefetcher_impl.hh | 2 +- src/mem/cache/tags/fa_lru.cc | 10 ++-- src/mem/cache/tags/fa_lru.hh | 8 +-- src/mem/cache/tags/iic.cc | 52 +++++++++++-------- src/mem/cache/tags/iic.hh | 14 ++--- src/mem/cache/tags/lru.cc | 45 ++++++++-------- src/mem/cache/tags/lru.hh | 8 +-- src/mem/cache/tags/split.cc | 26 +++++----- src/mem/cache/tags/split.hh | 8 +-- src/mem/cache/tags/split_lifo.cc | 18 ++++--- src/mem/cache/tags/split_lifo.hh | 8 +-- src/mem/cache/tags/split_lru.cc | 18 ++++--- src/mem/cache/tags/split_lru.hh | 8 +-- src/mem/packet.hh | 16 +++--- src/mem/request.hh | 4 ++ 19 files changed, 147 insertions(+), 132 deletions(-) diff --git a/src/SConscript b/src/SConscript index 04da17ee63..e20aca0d73 100644 --- a/src/SConscript +++ b/src/SConscript @@ -117,7 +117,7 @@ base_sources = Split(''' mem/cache/tags/base_tags.cc mem/cache/tags/cache_tags.cc mem/cache/tags/fa_lru.cc - mem/cache/tags/iic/cc + mem/cache/tags/iic.cc mem/cache/tags/lru.cc mem/cache/tags/repl/gen.cc mem/cache/tags/repl/repl.cc diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 7b2d57cd53..29da537468 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -36,6 +36,7 @@ #include "base/trace.hh" #include "mem/cache/base_cache.hh" #include "mem/cache/prefetch/base_prefetcher.hh" +#include "mem/request.hh" #include BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash, @@ -132,10 +133,10 @@ BasePrefetcher::getPacket() void BasePrefetcher::handleMiss(Packet * &pkt, Tick time) { - if (!pkt->req->isUncacheable() && !(pkt->isInstRead() && only_data)) + if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data)) { //Calculate the blk address - Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); //Check if miss is in pfq, if so remove it std::list::iterator iter = inPrefetch(blkAddr); @@ -177,15 +178,14 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) //temp calc this here... pfIdentified++; //create a prefetch memreq + Request * prefetchReq = new Request(*addr, blkSize, 0); Packet * prefetch; - prefetch = new Packet(); - prefetch->paddr = (*addr); - prefetch->size = blkSize; - prefetch->cmd = Hard_Prefetch; - prefetch->xc = pkt->xc; - prefetch->data = new uint8_t[blkSize]; - prefetch->req->asid = pkt->req->asid; - prefetch->req->setThreadNum() = pkt->req->getThreadNum(); + prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1); + uint8_t *new_data = new uint8_t[blkSize]; + prefetch->dataDynamicArray(new_data); + prefetch->req->setThreadContext(pkt->req->getCpuNum(), + pkt->req->getThreadNum()); + prefetch->time = time + (*delay); //@todo ADD LATENCY HERE //... initialize @@ -199,14 +199,14 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) } //Check if it is already in the miss_queue - if (inMissQueue(prefetch->paddr, prefetch->req->asid)) { + if (inMissQueue(prefetch->getAddr(), prefetch->req->getAsid())) { addr++; delay++; continue; } //Check if it is already in the pf buffer - if (inPrefetch(prefetch->paddr) != pf.end()) { + if (inPrefetch(prefetch->getAddr()) != pf.end()) { pfBufferHit++; addr++; delay++; @@ -240,7 +240,7 @@ BasePrefetcher::inPrefetch(Addr address) //Guaranteed to only be one match, we always check before inserting std::list::iterator iter; for (iter=pf.begin(); iter != pf.end(); iter++) { - if (((*iter)->paddr & ~(Addr)(blkSize-1)) == address) { + if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) { return iter; } } diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh index f25ebe1664..c22b763d1b 100644 --- a/src/mem/cache/prefetch/ghb_prefetcher.hh +++ b/src/mem/cache/prefetch/ghb_prefetcher.hh @@ -78,8 +78,8 @@ class GHBPrefetcher : public Prefetcher void calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int cpuID = pkt->cpu_num; + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); + int cpuID = pkt->req->getCpuNum(); if (!useCPUId) cpuID = 0; diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh index f897762151..4a8ee7de48 100644 --- a/src/mem/cache/prefetch/stride_prefetcher.hh +++ b/src/mem/cache/prefetch/stride_prefetcher.hh @@ -96,7 +96,7 @@ class StridePrefetcher : public Prefetcher std::list &delays) { // Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int cpuID = pkt->cpu_num; + int cpuID = pkt->req->getCpuNum(); if (!useCPUId) cpuID = 0; /* Scan Table for IAddr Match */ diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh index 9e46ba8937..db5c94820c 100644 --- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh +++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh @@ -52,7 +52,7 @@ TaggedPrefetcher:: calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); for (int d=1; d <= degree; d++) { Addr newAddr = blkAddr + d*(this->blkSize); diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 43ab363095..82d2c410d0 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -39,6 +39,7 @@ #include "mem/cache/tags/fa_lru.hh" #include "base/intmath.hh" +#include "base/misc.hh" using namespace std; @@ -204,7 +205,7 @@ FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache) FALRUBlk* FALRU::findBlock(Packet * &pkt, int &lat, int *inCache) { - Addr addr = pkt->paddr; + Addr addr = pkt->getAddr(); accesses++; int tmp_in_cache = 0; @@ -255,17 +256,16 @@ FALRU::findBlock(Addr addr, int asid) const } FALRUBlk* -FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +FALRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { FALRUBlk * blk = tail; assert(blk->inCache == 0); moveToHead(blk); tagHash.erase(blk->tag); - tagHash[blkAlign(pkt->paddr)] = blk; + tagHash[blkAlign(pkt->getAddr())] = blk; if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; } else { tagsInUse++; blk->isTouched = true; diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index 7855f84550..566e36c277 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -215,7 +215,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - FALRUBlk* findReplacement(Packet * &pkt, PacketList* & writebacks, + FALRUBlk* findReplacement(Packet * &pkt, PacketList & writebacks, BlkList &compress_blocks); /** @@ -319,7 +319,7 @@ public: * needed when writing to a compressed block. */ void writeData(FALRUBlk *blk, uint8_t *data, int size, - PacketList* &writebacks) + PacketList &writebacks) { } @@ -330,14 +330,14 @@ public: * @param asid The address space ID. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { } /** * Unimplemented. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index f4641401fc..0071ca2837 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -287,8 +287,8 @@ IIC::findBlock(Addr addr, int asid, int &lat) IICTag* IIC::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = hash(addr); @@ -363,11 +363,11 @@ IIC::findBlock(Addr addr, int asid) const IICTag* -IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, +IIC::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - DPRINTF(IIC, "Finding Replacement for %x\n", pkt->paddr); - unsigned set = hash(pkt->paddr); + DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr()); + unsigned set = hash(pkt->getAddr()); IICTag *tag_ptr; unsigned long *tmp_data = new unsigned long[numSub]; @@ -405,7 +405,7 @@ IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, } void -IIC::freeReplacementBlock(PacketList* & writebacks) +IIC::freeReplacementBlock(PacketList & writebacks) { IICTag *tag_ptr; unsigned long data_ptr; @@ -418,18 +418,23 @@ IIC::freeReplacementBlock(PacketList* & writebacks) tag_ptr->isModified() ? "writeback" : "clean"); /* write back replaced block data */ if (tag_ptr && (tag_ptr->isValid())) { - int req->setThreadNum() = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += tag_ptr->refCount; ++sampledRefs; tag_ptr->refCount = 0; if (tag_ptr->isModified()) { - Packet * writeback = +/* Packet * writeback = buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), tag_ptr->req->asid, tag_ptr->xc, blkSize, - (cache->doData())?tag_ptr->data:0, + tag_ptr->data, tag_ptr->size); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->dataDynamic(tag_ptr->data); + writebacks.push_back(writeback); } } @@ -446,7 +451,7 @@ IIC::freeReplacementBlock(PacketList* & writebacks) } unsigned long -IIC::getFreeDataBlock(PacketList* & writebacks) +IIC::getFreeDataBlock(PacketList & writebacks) { struct IICTag *tag_ptr; unsigned long data_ptr; @@ -466,7 +471,7 @@ IIC::getFreeDataBlock(PacketList* & writebacks) IICTag* -IIC::getFreeTag(int set, PacketList* & writebacks) +IIC::getFreeTag(int set, PacketList & writebacks) { unsigned long tag_index; IICTag *tag_ptr; @@ -708,7 +713,7 @@ IIC::invalidateBlk(int asid, Addr addr) void IIC::readData(IICTag *blk, uint8_t *data){ - assert(cache->doData()); +// assert(cache->doData()); assert(blk->size <= trivialSize || blk->numData > 0); int data_size = blk->size; if (data_size > trivialSize) { @@ -725,8 +730,8 @@ IIC::readData(IICTag *blk, uint8_t *data){ void IIC::writeData(IICTag *blk, uint8_t *write_data, int size, - PacketList* & writebacks){ - assert(cache->doData()); + PacketList & writebacks){ +// assert(cache->doData()); assert(size < blkSize || !blk->isCompressed()); DPRINTF(IIC, "Writing %d bytes to %x\n", size, blk->tag<re = (void*) repl->add(dest_tag - tagStore); dest_tag->set = hash(dest); dest_tag->tag = extractTag(dest); - dest_tag->req->asid = asid; + dest_tag->asid = asid; dest_tag->status = BlkValid | BlkWritable; } // Find the source tag here since it might move if we need to find a @@ -823,15 +830,17 @@ IIC::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) } else { dest_tag->status &= ~BlkCompressed; } +#endif } void -IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) +IIC::fixCopy(Packet * &pkt, PacketList &writebacks) { +#if 0 // if reference counter is greater than 1, do copy // else do write - Addr blk_addr = blkAlign(pkt->paddr); - IICTag* blk = findBlock(blk_addr, pkt->req->asid); + Addr blk_addr = blkAlign(pkt->getAddr); + IICTag* blk = findBlock(blk_addr, pkt->req->getAsid()); if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) { // copy the data @@ -843,7 +852,7 @@ IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) /** * @todo Remove this refetch once we change IIC to pointer based */ - blk = findBlock(blk_addr, pkt->req->asid); + blk = findBlock(blk_addr, pkt->req->getAsid()); assert(blk); if (cache->doData()) { memcpy(&(dataBlks[new_data][0]), @@ -855,6 +864,7 @@ IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) blk->data_ptr[i] = new_data; } } +#endif } void diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index ef3f03c534..6628f7e7a9 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -475,7 +475,7 @@ class IIC : public BaseTags * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - IICTag* findReplacement(Packet * &pkt, PacketList* &writebacks, + IICTag* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -495,7 +495,7 @@ class IIC : public BaseTags * needed when writing to a compressed block. */ void writeData(IICTag *blk, uint8_t *data, int size, - PacketList* & writebacks); + PacketList & writebacks); /** * Perform a block aligned copy from the source address to the destination. @@ -504,14 +504,14 @@ class IIC : public BaseTags * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * If a block is currently marked copy on write, copy it before writing. * @param req The write request. * @param writebacks List for any generated writeback requests. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks); + void fixCopy(Packet * &pkt, PacketList &writebacks); /** * Called at end of simulation to complete average block reference stats. @@ -541,14 +541,14 @@ private: * Free the resources associated with the next replacement block. * @param writebacks A list of any writebacks to perform. */ - void freeReplacementBlock(PacketList* & writebacks); + void freeReplacementBlock(PacketList & writebacks); /** * Return the pointer to a free data block. * @param writebacks A list of any writebacks to perform. * @return A pointer to a free data block. */ - unsigned long getFreeDataBlock(PacketList* & writebacks); + unsigned long getFreeDataBlock(PacketList & writebacks); /** * Get a free tag in the given hash set. @@ -556,7 +556,7 @@ private: * @param writebacks A list of any writebacks to perform. * @return a pointer to a free tag. */ - IICTag* getFreeTag(int set, PacketList* & writebacks); + IICTag* getFreeTag(int set, PacketList & writebacks); /** * Free the resources associated with the given tag. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 19a52aade7..81b84e11e8 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -135,7 +135,7 @@ LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : // table; won't matter because the block is invalid blk->tag = j; blk->whenReady = 0; - blk->req->asid = -1; + blk->asid = -1; blk->isTouched = false; blk->size = blkSize; sets[i].blks[j]=blk; @@ -187,8 +187,8 @@ LRU::findBlock(Addr addr, int asid, int &lat) LRUBlk* LRU::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -217,16 +217,15 @@ LRU::findBlock(Addr addr, int asid) const } LRUBlk* -LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +LRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); // grab a replacement candidate LRUBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -254,7 +253,7 @@ LRU::invalidateBlk(int asid, Addr addr) } void -LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); @@ -263,29 +262,31 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) LRUBlk *dest_blk = findBlock(dest, asid); if (dest_blk == NULL) { // Need to do a replacement - Packet * pkt = new Packet(); - pkt->paddr = dest; + Request *search = new Request(dest,1,0); + Packet * pkt = new Packet(search, Packet::ReadReq, -1); BlkList dummy_list; dest_blk = findReplacement(pkt, writebacks, dummy_list); if (dest_blk->isValid() && dest_blk->isModified()) { // Need to writeback data. - pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, +/* pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, dest_blk->set), dest_blk->req->asid, dest_blk->xc, blkSize, - (cache->doData())?dest_blk->data:0, + dest_blk->data, dest_blk->size); - writebacks.push_back(pkt); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->dataDynamic(dest_blk->data); + writebacks.push_back(writeback); } dest_blk->tag = extractTag(dest); - dest_blk->req->asid = asid; - /** - * @todo Do we need to pass in the execution context, or can we - * assume its the same? - */ - assert(source_blk->xc); - dest_blk->xc = source_blk->xc; + dest_blk->asid = asid; + delete search; + delete pkt; } /** * @todo Can't assume the status once we have coherence on copies. @@ -293,9 +294,7 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) // Set this block as readable, writeable, and dirty. dest_blk->status = 7; - if (cache->doData()) { - memcpy(dest_blk->data, source_blk->data, blkSize); - } + memcpy(dest_blk->data, source_blk->data, blkSize); } void diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index 9b4a557772..437244660e 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -201,7 +201,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - LRUBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + LRUBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -296,7 +296,7 @@ public: * needed when writing to a compressed block. */ void writeData(LRUBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -309,12 +309,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc index 9d9036abb2..bf23fb8cb3 100644 --- a/src/mem/cache/tags/split.cc +++ b/src/mem/cache/tags/split.cc @@ -270,30 +270,30 @@ SplitBlk* Split::findBlock(Packet * &pkt, int &lat) { - Addr aligned = blkAlign(pkt->paddr); + Addr aligned = blkAlign(pkt->getAddr()); if (memHash.count(aligned)) { memHash[aligned]++; - } else if (pkt->nic_pkt) { + } else if (pkt->nic_pkt()) { memHash[aligned] = 1; } - SplitBlk *blk = lru->findBlock(pkt->paddr, pkt->req->asid, lat); + SplitBlk *blk = lru->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); if (blk) { - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { NR_CP_hits++; } else { CR_CP_hits++; } } else { if (lifo && lifo_net) { - blk = lifo_net->findBlock(pkt->paddr, pkt->req->asid, lat); + blk = lifo_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); } else if (lru_net) { - blk = lru_net->findBlock(pkt->paddr, pkt->req->asid, lat); + blk = lru_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); } if (blk) { - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { NR_NP_hits++; } else { CR_NP_hits++; @@ -304,7 +304,7 @@ Split::findBlock(Packet * &pkt, int &lat) if (blk) { Tick latency = curTick - blk->ts; if (blk->isNIC) { - if (!blk->isUsed && !pkt->nic_pkt) { + if (!blk->isUsed && !pkt->nic_pkt()) { useByCPUCycleDist.sample(latency); nicUseByCPUCycleTotal += latency; nicBlksUsedByCPU++; @@ -312,7 +312,7 @@ Split::findBlock(Packet * &pkt, int &lat) } blk->isUsed = true; - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { DPRINTF(Split, "found block in partition %d\n", blk->part); } } @@ -350,12 +350,12 @@ Split::findBlock(Addr addr, int asid) const } SplitBlk* -Split::findReplacement(Packet * &pkt, PacketList* &writebacks, +Split::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { SplitBlk *blk; - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { DPRINTF(Split, "finding a replacement for nic_req\n"); nic_repl++; if (lifo && lifo_net) @@ -397,7 +397,7 @@ Split::findReplacement(Packet * &pkt, PacketList* &writebacks, // blk attributes for the new blk coming IN blk->ts = curTick; - blk->isNIC = (pkt->nic_pkt) ? true : false; + blk->isNIC = (pkt->nic_pkt()) ? true : false; return blk; } @@ -422,7 +422,7 @@ Split::invalidateBlk(int asid, Addr addr) } void -Split::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +Split::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { if (lru->probe(asid, source)) lru->doCopy(source, dest, asid, writebacks); diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh index 6f2441597d..5e03402690 100644 --- a/src/mem/cache/tags/split.hh +++ b/src/mem/cache/tags/split.hh @@ -224,7 +224,7 @@ class Split : public BaseTags * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); @@ -304,7 +304,7 @@ class Split : public BaseTags * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -317,12 +317,12 @@ class Split : public BaseTags * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc index c6bb91eff1..f6493fdd28 100644 --- a/src/mem/cache/tags/split_lifo.cc +++ b/src/mem/cache/tags/split_lifo.cc @@ -257,8 +257,8 @@ SplitLIFO::findBlock(Addr addr, int asid, int &lat) SplitBlk* SplitLIFO::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -292,10 +292,10 @@ SplitLIFO::findBlock(Addr addr, int asid) const } SplitBlk* -SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, +SplitLIFO::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); SplitBlk *firstIn = sets[set].firstIn; SplitBlk *lastIn = sets[set].lastIn; @@ -315,10 +315,9 @@ SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, } DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", - pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); + pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -349,8 +348,10 @@ SplitLIFO::invalidateBlk(int asid, Addr addr) } void -SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { +//Copy Unsuported for now +#if 0 assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); SplitBlk *source_blk = findBlock(source, asid); @@ -391,6 +392,7 @@ SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) if (cache->doData()) { memcpy(dest_blk->data, source_blk->data, blkSize); } +#endif } void diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh index c50eaa53db..dfcaa0b679 100644 --- a/src/mem/cache/tags/split_lifo.hh +++ b/src/mem/cache/tags/split_lifo.hh @@ -224,7 +224,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -319,7 +319,7 @@ public: * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -332,12 +332,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc index 4b7f4c1147..7fc7420016 100644 --- a/src/mem/cache/tags/split_lru.cc +++ b/src/mem/cache/tags/split_lru.cc @@ -135,7 +135,7 @@ SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int // table; won't matter because the block is invalid blk->tag = j; blk->whenReady = 0; - blk->req->asid = -1; + blk->asid = -1; blk->isTouched = false; blk->size = blkSize; sets[i].blks[j]=blk; @@ -206,8 +206,8 @@ SplitLRU::findBlock(Addr addr, int asid, int &lat) SplitBlk* SplitLRU::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -236,16 +236,15 @@ SplitLRU::findBlock(Addr addr, int asid) const } SplitBlk* -SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +SplitLRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); // grab a replacement candidate SplitBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -275,8 +274,10 @@ SplitLRU::invalidateBlk(int asid, Addr addr) } void -SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { +//Copy not supported for now +#if 0 assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); SplitBlk *source_blk = findBlock(source, asid); @@ -317,6 +318,7 @@ SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) if (cache->doData()) { memcpy(dest_blk->data, source_blk->data, blkSize); } +#endif } void diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh index 1c0fc8600a..03886b1d8d 100644 --- a/src/mem/cache/tags/split_lru.hh +++ b/src/mem/cache/tags/split_lru.hh @@ -207,7 +207,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -302,7 +302,7 @@ public: * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -315,12 +315,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 0369745c9b..2b97ab0c1d 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -100,9 +100,6 @@ class Packet /** The size of the request or transfer. */ int size; - /** The offset within the block that represents the data. */ - int offset; - /** Device address (e.g., bus ID) of the source of the * transaction. The source is not responsible for setting this * field; it is set implicitly by the interconnect when the @@ -120,8 +117,6 @@ class Packet bool addrSizeValid; /** Is the 'src' field valid? */ bool srcValid; - /** Is the offset valid. */ - bool offsetValid; public: @@ -225,6 +220,9 @@ class Packet bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } + bool isCompressed() { return (flags & COMPRESSED) != 0; } + + bool nic_pkt() { assert("Unimplemented\n" && 0); } /** Possible results of a packet's request. */ enum Result @@ -249,7 +247,7 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } - int getOffset() const { assert(offsetValid); return offset; } + Addr getOffset(int blkSize) const { return req->getPaddr() & (Addr)(blkSize - 1); } void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } void cmdOverride(Command newCmd) { cmd = newCmd; } @@ -262,7 +260,7 @@ class Packet : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), addrSizeValid(_req->validPaddr), - srcValid(false), offsetValid(false), + srcValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { @@ -275,8 +273,8 @@ class Packet Packet(Request *_req, Command _cmd, short _dest, int _blkSize) : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), - offset(_req->paddr & (_blkSize - 1)), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), offsetValid(true), + dest(_dest), + addrSizeValid(_req->validPaddr), srcValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { diff --git a/src/mem/request.hh b/src/mem/request.hh index 46d9b6fd70..a1524f8073 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -63,6 +63,8 @@ const unsigned PF_EXCLUSIVE = 0x100; const unsigned EVICT_NEXT = 0x200; /** The request should ignore unaligned access faults */ const unsigned NO_ALIGN_FAULT = 0x400; +/** The request was an instruction read. */ +const unsigned INST_READ = 0x800; class Request { @@ -228,6 +230,8 @@ class Request /** Accessor Function to Check Cacheability. */ bool isUncacheable() { return getFlags() & UNCACHEABLE; } + bool isInstRead() { return getFlags() & INST_READ; } + friend class Packet; }; From dea1a19b2de2fe031f714904c5247cf27b363237 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 11:34:27 -0400 Subject: [PATCH 082/152] Fix the packet data allocation methods. Small fixes from changesets after my initial work. This now compiles. src/mem/cache/base_cache.cc: Fix getPort function that changed src/mem/cache/base_cache.hh: Fix get port function, provide default implementations of virtual functions in the base class src/mem/cache/cache.hh: Fix virtual function declerations src/mem/cache/cache_builder.cc: Fix params src/mem/cache/cache_impl.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/mshr.cc: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/tags/iic.cc: src/mem/cache/tags/lru.cc: Properly allocate data in packet --HG-- extra : convert_revision : dedf8b0f76ab90b06b60f8fe079c0ae361f91a48 --- src/mem/cache/base_cache.cc | 2 +- src/mem/cache/base_cache.hh | 26 ++++++++++++++++++----- src/mem/cache/cache.hh | 8 +++---- src/mem/cache/cache_builder.cc | 4 ++-- src/mem/cache/cache_impl.hh | 3 +-- src/mem/cache/miss/blocking_buffer.cc | 3 +-- src/mem/cache/miss/miss_queue.cc | 3 +-- src/mem/cache/miss/mshr.cc | 3 +-- src/mem/cache/prefetch/base_prefetcher.cc | 3 +-- src/mem/cache/tags/iic.cc | 7 +++--- src/mem/cache/tags/lru.cc | 3 ++- 11 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 89e23ce318..c1ed6d3d43 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -99,7 +99,7 @@ BaseCache::CachePort::clearBlocked() } Port* -BaseCache::getPort(const std::string &if_name) +BaseCache::getPort(const std::string &if_name, int idx) { if(if_name == "cpu_side") { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 977e0ae297..2754fab5a6 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -41,6 +41,7 @@ #include #include +#include "base/misc.hh" #include "base/statistics.hh" #include "base/trace.hh" #include "mem/mem_object.hh" @@ -122,14 +123,29 @@ class BaseCache : public MemObject CachePort *memSidePort; public: - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); private: //To be defined in cache_impl.hh not in base class - virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); - virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); - virtual void recvStatusChange(Port::Status status, bool isCpuSide); + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual void recvStatusChange(Port::Status status, bool isCpuSide) + { + fatal("No implementation"); + } /** * Bit vector of the blocking reasons for the access path. diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index d2af1d8bf4..788715e761 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -146,16 +146,16 @@ class Cache : public BaseCache /** Instantiates a basic cache object. */ Cache(const std::string &_name, Params ¶ms); - bool doTimingAccess(Packet *pkt, CachePort *cachePort, + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, + virtual Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - void doFunctionalAccess(Packet *pkt, CachePort *cachePort, + virtual void doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - void recvStatusChange(Port::Status status, bool isCpuSide); + virtual void recvStatusChange(Port::Status status, bool isCpuSide); void regStats(); diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index 8758dc57a4..05a149a1cd 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -230,7 +230,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) Cache, b, c>::Params params(tagStore, mq, coh, \ do_copy, base_params, \ /*in_bus, out_bus,*/ pf, \ - prefetch_access); \ + prefetch_access, hit_latency); \ Cache, b, c> *retval = \ new Cache, b, c>(getInstanceName(), /*hier,*/ \ params); \ @@ -242,7 +242,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ out_bus->rangeChange(); \ return retval; \ -*/return true; \ +*/return retval; \ } while (0) #define BUILD_CACHE_PANIC(x) do { \ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index dbf2e49f14..f1e9c3698f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -588,8 +588,7 @@ Cache::probe(Packet * &pkt, bool update) Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); - uint8_t* temp_data = new uint8_t[blkSize]; - busPkt->dataDynamicArray(temp_data); + busPkt->allocate(); busPkt->time = curTick; diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index d745cb8c65..10d53b109d 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -210,8 +210,7 @@ BlockingBuffer::doWriteback(Addr addr, int asid, // Generate request Request * req = new Request(addr, size, 0); Packet * pkt = new Packet(req, Packet::Writeback, -1); - uint8_t *new_data = new uint8_t[size]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); if (data) { memcpy(pkt->getPtr(), data, size); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 34290351de..99ebab0179 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -714,8 +714,7 @@ MissQueue::doWriteback(Addr addr, int asid, // Generate request Request * req = new Request(addr, size, 0); Packet * pkt = new Packet(req, Packet::Writeback, -1); - uint8_t *new_data = new uint8_t[size]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); if (data) { memcpy(pkt->getPtr(), data, size); } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index fe8cbeea45..05a2fe1c59 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -90,8 +90,7 @@ MSHR::allocateAsBuffer(Packet * &target) asid = target->req->getAsid(); threadNum = target->req->getThreadNum(); pkt = new Packet(target->req, target->cmd, -1); - uint8_t *new_data = new uint8_t[target->getSize()]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); pkt->senderState = (Packet::SenderState*)this; pkt->time = curTick; } diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 29da537468..8975519896 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -181,8 +181,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) Request * prefetchReq = new Request(*addr, blkSize, 0); Packet * prefetch; prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1); - uint8_t *new_data = new uint8_t[blkSize]; - prefetch->dataDynamicArray(new_data); + prefetch->allocate(); prefetch->req->setThreadContext(pkt->req->getCpuNum(), pkt->req->getThreadNum()); diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index 0071ca2837..847fabc887 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -430,10 +430,11 @@ IIC::freeReplacementBlock(PacketList & writebacks) tag_ptr->data, tag_ptr->size); */ - Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), + Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), blkSize, 0); - Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); - writeback->dataDynamic(tag_ptr->data); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->allocate(); + memcpy(writeback->getPtr(), tag_ptr->data, blkSize); writebacks.push_back(writeback); } diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 81b84e11e8..b7259bd3ab 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -280,7 +280,8 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) dest_blk->set), blkSize, 0); Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); - writeback->dataDynamic(dest_blk->data); + writeback->allocate(); + memcpy(writeback->getPtr(),dest_blk->data, blkSize); writebacks.push_back(writeback); } dest_blk->tag = extractTag(dest); From 1bdc65b00f40b20dc5c7e97d3c8d8e4b311230a8 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 16:25:35 -0400 Subject: [PATCH 083/152] First pass, now compiles with current head of tree. Compile and initialization work, still working on functionality. src/mem/cache/base_cache.cc: Temp fix for cpu's use of getPort functionality. CPU's will need to be ported to the new connector objects. Also, all packets have to have data or the delete fails. src/mem/cache/cache.hh: Fix function prototypes so overloading works src/mem/cache/cache_impl.hh: fix functions to match virtual base class src/mem/cache/miss/miss_queue.cc: Packets havve to have data, or delete fails src/python/m5/objects/BaseCache.py: Update for newmem --HG-- extra : convert_revision : 2b6ad1e9d8ae07ace9294cd257e2ccc0024b7fcb --- src/mem/cache/base_cache.cc | 21 ++++++++++++++------- src/mem/cache/cache.hh | 6 ++---- src/mem/cache/cache_impl.hh | 8 ++++---- src/mem/cache/miss/miss_queue.cc | 3 ++- src/python/m5/objects/BaseCache.py | 12 ++++++------ 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index c1ed6d3d43..aaaf1bdefb 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -101,16 +101,21 @@ BaseCache::CachePort::clearBlocked() Port* BaseCache::getPort(const std::string &if_name, int idx) { - if(if_name == "cpu_side") + if (if_name == "") { - if(cpuSidePort != NULL) - panic("Already have a cpu side for this cache\n"); - cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); return cpuSidePort; } - else if(if_name == "mem_side") + if (if_name == "functional") { - if(memSidePort != NULL) + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "mem_side") + { + if (memSidePort != NULL) panic("Already have a mem side for this cache\n"); memSidePort = new CachePort(name() + "-mem_side_port", this, false); return memSidePort; @@ -121,9 +126,10 @@ BaseCache::getPort(const std::string &if_name, int idx) void BaseCache::regStats() { - Request temp_req; + Request temp_req((Addr) NULL, 4, 0); Packet::Command temp_cmd = Packet::ReadReq; Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); //Temp allocate, all need data using namespace Stats; @@ -331,4 +337,5 @@ BaseCache::regStats() .name(name() + ".cache_copies") .desc("number of cache copies performed") ; + } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 788715e761..1243c9d9ec 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -149,11 +149,9 @@ class Cache : public BaseCache virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - virtual Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, - bool isCpuSide); + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); - virtual void doFunctionalAccess(Packet *pkt, CachePort *cachePort, - bool isCpuSide); + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); virtual void recvStatusChange(Port::Status status, bool isCpuSide); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index f1e9c3698f..0cb33461bb 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -77,7 +77,7 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) template Tick Cache:: -doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) +doAtomicAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { @@ -97,18 +97,18 @@ doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) template void Cache:: -doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) +doFunctionalAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { - probe(pkt, false); + probe(pkt, true); } else { if (pkt->isResponse()) handleResponse(pkt); else - snoopProbe(pkt, false); + snoopProbe(pkt, true); } } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 99ebab0179..da0448ad3a 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -58,9 +58,10 @@ MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, void MissQueue::regStats(const string &name) { - Request temp_req; + Request temp_req((Addr) NULL, 4, 0); Packet::Command temp_cmd = Packet::ReadReq; Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); using namespace Stats; diff --git a/src/python/m5/objects/BaseCache.py b/src/python/m5/objects/BaseCache.py index 33f44759bf..497b2b038a 100644 --- a/src/python/m5/objects/BaseCache.py +++ b/src/python/m5/objects/BaseCache.py @@ -1,29 +1,26 @@ from m5.config import * -from BaseMem import BaseMem +from MemObject import MemObject class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb'] -class BaseCache(BaseMem): +class BaseCache(MemObject): type = 'BaseCache' adaptive_compression = Param.Bool(False, "Use an adaptive compression scheme") assoc = Param.Int("associativity") block_size = Param.Int("block size in bytes") + latency = Param.Int("Latency") compressed_bus = Param.Bool(False, "This cache connects to a compressed memory") compression_latency = Param.Latency('0ns', "Latency in cycles of compression algorithm") do_copy = Param.Bool(False, "perform fast copies in the cache") hash_delay = Param.Int(1, "time in cycles of hash access") - in_bus = Param.Bus(NULL, "incoming bus object") lifo = Param.Bool(False, "whether this NIC partition should use LIFO repl. policy") max_miss_count = Param.Counter(0, "number of misses to handle before calling exit") - mem_trace = Param.MemTraceWriter(NULL, - "memory trace writer to record accesses") mshrs = Param.Int("number of MSHRs (max outstanding requests)") - out_bus = Param.Bus("outgoing bus object") prioritizeRequests = Param.Bool(False, "always service demand misses first") protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use") @@ -63,3 +60,6 @@ class BaseCache(BaseMem): "Use the CPU ID to seperate calculations of prefetches") prefetch_data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") + hit_latency = Param.Int(1,"Hit Latency of the cache") + cpu_side = Port("Port on side closer to CPU") + mem_side = Port("Port on side closer to MEM") From 7a4929813423c6f72827c58453cb9bd591f1801c Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 17:21:58 -0400 Subject: [PATCH 084/152] AtomicSimpleCPU with a cache now runs the hello world! test program. Need to clean up a bunch of flags/hacks in the code. Then onto Timming mode. Functional accesses also work properly, although not exactly how we wanted them. I'll need to clean that up as well. src/cpu/simple/atomic.cc: Atomic CPU needs to set thread context so stats work in cache. Temporarily just use CPU=0 ThreadID=0 src/mem/cache/cache_impl.hh: Need to return success/failure properly still Physical memory object doesn't assert SATISFIED anymore, need to remove that flag src/mem/cache/tags/lru.cc: Doesn't work if the REQ doesn't set it's ASID. Temporary fix use 0 always --HG-- extra : convert_revision : d06a39684af593db699b64df9a29f80c61d8d050 --- src/cpu/simple/atomic.cc | 3 +++ src/mem/cache/cache_impl.hh | 10 ++++++++-- src/mem/cache/tags/lru.cc | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index be6f421b32..b7202cbbb7 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -124,15 +124,18 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 0cb33461bb..aae5cbf018 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -82,6 +82,8 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (isCpuSide) { probe(pkt, true); + //TEMP ALWAYS SUCCES FOR NOW + pkt->result = Packet::Success; } else { @@ -101,7 +103,11 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { + //TEMP USE CPU?THREAD 0 0 + pkt->req->setThreadContext(0,0); probe(pkt, true); + //TEMP ALWAYS SUCCESFUL FOR NOW + pkt->result = Packet::Success; } else { @@ -594,12 +600,12 @@ Cache::probe(Packet * &pkt, bool update) lat = memSidePort->sendAtomic(busPkt); - if (!(busPkt->flags & SATISFIED)) { +/* if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return return 0; } - misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; +*/ misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; CacheBlk::State old_state = (blk) ? blk->status : 0; tags->handleFill(blk, busPkt, diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index b7259bd3ab..556025a3ab 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -188,7 +188,7 @@ LRUBlk* LRU::findBlock(Packet * &pkt, int &lat) { Addr addr = pkt->getAddr(); - int asid = pkt->req->getAsid(); + int asid = 0;//pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); From d9ef772e8d43ebfd2a4bece76f33cc62d71258a6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 30 Jun 2006 19:52:08 -0400 Subject: [PATCH 085/152] Make O3CPU model independent of the ISA Use O3CPU when building instead of AlphaO3CPU. I could use some better python magic in the cpu_models.py file! AUTHORS: add middle initial SConstruct: change from AlphaO3CPU to O3CPU src/cpu/SConscript: edits to build O3CPU instead of AlphaO3CPU src/cpu/cpu_models.py: change substitution template to use proper CPU EXEC CONTEXT For O3CPU Model... Actually, some Python expertise could be used here. The 'env' variable is not passed to this file, so I had to parse through the ARGV to find the ISA... src/cpu/o3/base_dyn_inst.cc: src/cpu/o3/bpred_unit.cc: src/cpu/o3/commit.cc: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/decode.cc: src/cpu/o3/fetch.cc: src/cpu/o3/iew.cc: src/cpu/o3/inst_queue.cc: src/cpu/o3/lsq.cc: src/cpu/o3/lsq_unit.cc: src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/rename.cc: src/cpu/o3/rob.cc: use isa_specific.hh src/sim/process.cc: only initi NextNPC if not ALPHA src/cpu/o3/alpha/cpu.cc: alphao3cpu impl src/cpu/o3/alpha/cpu.hh: move AlphaTC to it's own file src/cpu/o3/alpha/cpu_impl.hh: Move AlphaTC to it's own file ... src/cpu/o3/alpha/dyn_inst.cc: src/cpu/o3/alpha/dyn_inst.hh: src/cpu/o3/alpha/dyn_inst_impl.hh: include paths src/cpu/o3/alpha/impl.hh: include paths, set default MaxThreads to 2 instead of 4 src/cpu/o3/alpha/params.hh: set Alpha Specific Params here src/python/m5/objects/O3CPU.py: add O3CPU class src/cpu/o3/SConscript: include isa-specific build files src/cpu/o3/alpha/thread_context.cc: NEW HOME of AlphaTC src/cpu/o3/alpha/thread_context.hh: new home of AlphaTC src/cpu/o3/isa_specific.hh: includes ISA specific files src/cpu/o3/params.hh: base o3 params src/cpu/o3/thread_context.hh: base o3 thread context src/cpu/o3/thread_context_impl.hh: base o3 thead context impl --HG-- rename : src/cpu/o3/alpha_cpu.cc => src/cpu/o3/alpha/cpu.cc rename : src/cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha/cpu.hh rename : src/cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc rename : src/cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha/cpu_impl.hh rename : src/cpu/o3/alpha_dyn_inst.cc => src/cpu/o3/alpha/dyn_inst.cc rename : src/cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha/dyn_inst.hh rename : src/cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha/dyn_inst_impl.hh rename : src/cpu/o3/alpha_impl.hh => src/cpu/o3/alpha/impl.hh rename : src/cpu/o3/alpha_params.hh => src/cpu/o3/alpha/params.hh rename : src/python/m5/objects/AlphaO3CPU.py => src/python/m5/objects/O3CPU.py extra : convert_revision : d377d6417452ac337bc502f28b2fde907d6b340e --- AUTHORS | 2 +- SConstruct | 2 +- src/cpu/SConscript | 8 +- src/cpu/cpu_models.py | 22 +- src/cpu/o3/SConscript | 79 +++ src/cpu/o3/{alpha_cpu.cc => alpha/cpu.cc} | 6 +- src/cpu/o3/alpha/cpu.hh | 204 ++++++++ .../cpu_builder.cc} | 0 .../{alpha_cpu_impl.hh => alpha/cpu_impl.hh} | 469 +---------------- .../{alpha_dyn_inst.cc => alpha/dyn_inst.cc} | 4 +- .../{alpha_dyn_inst.hh => alpha/dyn_inst.hh} | 4 +- .../dyn_inst_impl.hh} | 2 +- src/cpu/o3/{alpha_impl.hh => alpha/impl.hh} | 4 +- src/cpu/o3/alpha/params.hh | 69 +++ src/cpu/o3/alpha/thread_context.cc | 36 ++ src/cpu/o3/alpha/thread_context.hh | 70 +++ src/cpu/o3/alpha_cpu.hh | 434 ---------------- src/cpu/o3/base_dyn_inst.cc | 3 +- src/cpu/o3/bpred_unit.cc | 3 +- src/cpu/o3/commit.cc | 3 +- src/cpu/o3/cpu.cc | 3 +- src/cpu/o3/cpu.hh | 8 + src/cpu/o3/decode.cc | 3 +- src/cpu/o3/fetch.cc | 3 +- src/cpu/o3/iew.cc | 3 +- src/cpu/o3/inst_queue.cc | 3 +- src/cpu/o3/isa_specific.hh | 40 ++ src/cpu/o3/lsq.cc | 4 +- src/cpu/o3/lsq_unit.cc | 4 +- src/cpu/o3/mem_dep_unit.cc | 3 +- src/cpu/o3/{alpha_params.hh => params.hh} | 30 +- src/cpu/o3/rename.cc | 3 +- src/cpu/o3/rob.cc | 3 +- src/cpu/o3/thread_context.hh | 243 +++++++++ src/cpu/o3/thread_context_impl.hh | 488 ++++++++++++++++++ .../m5/objects/{AlphaO3CPU.py => O3CPU.py} | 4 +- src/sim/process.cc | 3 + 37 files changed, 1300 insertions(+), 972 deletions(-) create mode 100755 src/cpu/o3/SConscript rename src/cpu/o3/{alpha_cpu.cc => alpha/cpu.cc} (94%) create mode 100644 src/cpu/o3/alpha/cpu.hh rename src/cpu/o3/{alpha_cpu_builder.cc => alpha/cpu_builder.cc} (100%) rename src/cpu/o3/{alpha_cpu_impl.hh => alpha/cpu_impl.hh} (52%) rename src/cpu/o3/{alpha_dyn_inst.cc => alpha/dyn_inst.cc} (95%) rename src/cpu/o3/{alpha_dyn_inst.hh => alpha/dyn_inst.hh} (99%) rename src/cpu/o3/{alpha_dyn_inst_impl.hh => alpha/dyn_inst_impl.hh} (99%) rename src/cpu/o3/{alpha_impl.hh => alpha/impl.hh} (98%) create mode 100644 src/cpu/o3/alpha/params.hh create mode 100755 src/cpu/o3/alpha/thread_context.cc create mode 100644 src/cpu/o3/alpha/thread_context.hh delete mode 100644 src/cpu/o3/alpha_cpu.hh create mode 100755 src/cpu/o3/isa_specific.hh rename src/cpu/o3/{alpha_params.hh => params.hh} (88%) mode change 100644 => 100755 create mode 100755 src/cpu/o3/thread_context.hh create mode 100755 src/cpu/o3/thread_context_impl.hh rename src/python/m5/objects/{AlphaO3CPU.py => O3CPU.py} (98%) diff --git a/AUTHORS b/AUTHORS index 8904070d89..ec3de7bb22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -28,7 +28,7 @@ Ronald G. Dreslinski Jr Gabriel Black ----------------------- -Korey Sewell +Korey L. Sewell ----------------------- David Green diff --git a/SConstruct b/SConstruct index 2dc53f7cb3..b18fe66d34 100644 --- a/SConstruct +++ b/SConstruct @@ -263,7 +263,7 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] # Define the universe of supported CPU models env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', - 'FullCPU', 'AlphaO3CPU', + 'FullCPU', 'O3CPU', 'OzoneCPU'] # Sticky options get saved in the options file so they persist from diff --git a/src/cpu/SConscript b/src/cpu/SConscript index baa5d531e4..3dcc2f1ec1 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -104,7 +104,7 @@ env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS'])) # List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True # and one of these are not being used. -CheckerSupportedCPUList = ['AlphaO3CPU', 'OzoneCPU'] +CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] ################################################################# # @@ -130,12 +130,10 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') -if 'AlphaO3CPU' in env['CPU_MODELS']: +if 'O3CPU' in env['CPU_MODELS']: + sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' o3/2bit_local_pred.cc - o3/alpha_dyn_inst.cc - o3/alpha_cpu.cc - o3/alpha_cpu_builder.cc o3/base_dyn_inst.cc o3/bpred_unit.cc o3/btb.cc diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index e7ef9ab42b..1add327458 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -26,6 +26,10 @@ # # Authors: Steve Reinhardt +import os +import os.path +import sys + ################ # CpuModel class # @@ -47,7 +51,6 @@ class CpuModel: # Add self to dict CpuModel.dict[name] = self - # # Define CPU models. # @@ -67,9 +70,6 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "encumbered/cpu/full/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) -CpuModel('AlphaO3CPU', 'alpha_o3_exec.cc', - '#include "cpu/o3/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst' }) CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', '#include "cpu/ozone/dyn_inst.hh"', { 'CPU_exec_context': 'OzoneDynInst' }) @@ -80,3 +80,17 @@ CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) +# Maybe there is a more clever way to determine ISA +# here but since the environment variable isnt passed through +# here the easiest way is this... +sub_template = 'not found' +for argument in sys.argv: + if 'ALPHA' in argument: + sub_template = 'AlphaDynInst' + +if sub_template == 'not found': + sys.exit('NO CPU_exec_context substitution defined for this ISA') + +CpuModel('O3CPU', 'o3_cpu_exec.cc', + '#include "cpu/o3/isa_specific.hh"', + { 'CPU_exec_context': sub_template }) diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript new file mode 100755 index 0000000000..e65d41411c --- /dev/null +++ b/src/cpu/o3/SConscript @@ -0,0 +1,79 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +import os +import os.path +import sys + +# Import build environment variable from SConstruct. +Import('env') + + +################################################################# +# +# Include ISA-specific files for the O3 CPU-model +# +################################################################# + +sources = [] + +if env['TARGET_ISA'] == 'alpha': + sources += Split(''' + alpha/dyn_inst.cc + alpha/cpu.cc + alpha/thread_context.cc + alpha/cpu_builder.cc + ''') +elif env['TARGET_ISA'] == 'mips': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # mips/dyn_inst.cc + # mips/cpu.cc + # mips/thread_context.cc + # mips/cpu_builder.cc + # ''') +elif env['TARGET_ISA'] == 'sparc': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # sparc/dyn_inst.cc + # sparc/cpu.cc + # sparc/thread_context.cc + # sparc/cpu_builder.cc + # ''') +else: + sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) + + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc similarity index 94% rename from src/cpu/o3/alpha_cpu.cc rename to src/cpu/o3/alpha/cpu.cc index e44ed00311..87a4d03a77 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,9 +28,9 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alphaimpl.hh" +#include "cpu/o3/alpha/cpu_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" // Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh new file mode 100644 index 0000000000..b961341d58 --- /dev/null +++ b/src/cpu/o3/alpha/cpu.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * AlphaO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template +class AlphaO3CPU : public FullO3CPU +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState ImplState; + typedef O3ThreadState Thread; + typedef typename Impl::Params Params; + + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); + +#if FULL_SYSTEM + /** ITB pointer. */ + AlphaITB *itb; + /** DTB pointer. */ + AlphaDTB *dtb; +#endif + + /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return itb->translate(req, thread->getTC()); + } + + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), false); + } + + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), true); + } + +#else + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + +#endif + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + +#if FULL_SYSTEM + /** Posts an interrupt. */ + void post_interrupt(int int_num, int index); + /** Reads the interrupt flag. */ + int readIntrFlag(); + /** Sets the interrupt flags. */ + void setIntrFlag(int val); + /** HW return from error interrupt. */ + Fault hwrei(unsigned tid); + /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + bool simPalCheck(int palFunc, unsigned tid); + + /** Processes any interrupts. */ + void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } +#endif + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + + /** CPU read function, forwards read to LSQ. */ + template + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc similarity index 100% rename from src/cpu/o3/alpha_cpu_builder.cc rename to src/cpu/o3/alpha/cpu_builder.cc diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh similarity index 52% rename from src/cpu/o3/alpha_cpu_impl.hh rename to src/cpu/o3/alpha/cpu_impl.hh index eca6fbbcbf..2da683398b 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -38,8 +38,9 @@ #include "sim/sim_events.hh" #include "sim/stats.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/params.hh" +#include "cpu/o3/alpha/tc.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/thread_state.hh" @@ -110,7 +111,8 @@ AlphaO3CPU::AlphaO3CPU(Params *params) ThreadContext *tc; // Setup the TC that will serve as the interface to the threads/CPU. - AlphaTC *alpha_tc = new AlphaTC; + AlphaTC *alpha_tc = + new AlphaTC; tc = alpha_tc; @@ -118,7 +120,7 @@ AlphaO3CPU::AlphaO3CPU(Params *params) // CheckerThreadContext. #if USE_CHECKER if (params->checker) { - tc = new CheckerThreadContext( + tc = new CheckerThreadContext>( alpha_tc, this->checker); } #endif @@ -187,465 +189,6 @@ AlphaO3CPU::regStats() this->commit.regStats(); } -#if FULL_SYSTEM -template -VirtualPort * -AlphaO3CPU::AlphaTC::getVirtPort(ThreadContext *src_tc) -{ - if (!src_tc) - return thread->getVirtPort(); - - VirtualPort *vp; - Port *mem_port; - - vp = new VirtualPort("tc-vport", src_tc); - mem_port = cpu->system->physmem->getPort("functional"); - mem_port->setPeer(vp); - vp->setPeer(mem_port); - return vp; -} - -template -void -AlphaO3CPU::AlphaTC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - -template -void -AlphaO3CPU::AlphaTC::takeOverFrom(ThreadContext *old_context) -{ - // some things should already be set up -#if FULL_SYSTEM - assert(getSystemPtr() == old_context->getSystemPtr()); -#else - assert(getProcessPtr() == old_context->getProcessPtr()); -#endif - - // copy over functional state - setStatus(old_context->status()); - copyArchRegs(old_context); - setCpuId(old_context->readCpuId()); - -#if !FULL_SYSTEM - thread->funcExeInst = old_context->readFuncExeInst(); -#else - EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); - if (other_quiesce) { - // Point the quiesce event's TC at this TC so that it wakes up - // the proper CPU. - other_quiesce->tc = this; - } - if (thread->quiesceEvent) { - thread->quiesceEvent->tc = this; - } - - // Transfer kernel stats from one CPU to the other. - thread->kernelStats = old_context->getKernelStats(); -// storeCondFailures = 0; - cpu->lockFlag = false; -#endif - - old_context->setStatus(ThreadContext::Unallocated); - - thread->inSyscall = false; - thread->trapPending = false; -} - -#if FULL_SYSTEM -template -void -AlphaO3CPU::AlphaTC::delVirtPort(VirtualPort *vp) -{ - delete vp->getPeer(); - delete vp; -} -#endif - -template -void -AlphaO3CPU::AlphaTC::activate(int delay) -{ - DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Active) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; -#endif - - if (thread->status() == ThreadContext::Unallocated) { - cpu->activateWhenReady(thread->readTid()); - return; - } - - thread->setStatus(ThreadContext::Active); - - // status() == Suspended - cpu->activateContext(thread->readTid(), delay); -} - -template -void -AlphaO3CPU::AlphaTC::suspend() -{ - DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); - - if (thread->status() == ThreadContext::Suspended) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; - thread->lastSuspend = curTick; -#endif -/* -#if FULL_SYSTEM - // Don't change the status from active if there are pending interrupts - if (cpu->check_interrupts()) { - assert(status() == ThreadContext::Active); - return; - } -#endif -*/ - thread->setStatus(ThreadContext::Suspended); - cpu->suspendContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::deallocate() -{ - DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Unallocated) - return; - - thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::halt() -{ - DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); - - if (thread->status() == ThreadContext::Halted) - return; - - thread->setStatus(ThreadContext::Halted); - cpu->haltContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::regStats(const std::string &name) -{ -#if FULL_SYSTEM - thread->kernelStats = new Kernel::Statistics(cpu->system); - thread->kernelStats->regStats(name + ".kern"); -#endif -} - -template -void -AlphaO3CPU::AlphaTC::serialize(std::ostream &os) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->serialize(os); -#endif - -} - -template -void -AlphaO3CPU::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->unserialize(cp, section); -#endif - -} - -#if FULL_SYSTEM -template -EndQuiesceEvent * -AlphaO3CPU::AlphaTC::getQuiesceEvent() -{ - return thread->quiesceEvent; -} - -template -Tick -AlphaO3CPU::AlphaTC::readLastActivate() -{ - return thread->lastActivate; -} - -template -Tick -AlphaO3CPU::AlphaTC::readLastSuspend() -{ - return thread->lastSuspend; -} - -template -void -AlphaO3CPU::AlphaTC::profileClear() -{} - -template -void -AlphaO3CPU::AlphaTC::profileSample() -{} -#endif - -template -TheISA::MachInst -AlphaO3CPU::AlphaTC:: getInst() -{ - return thread->getInst(); -} - -template -void -AlphaO3CPU::AlphaTC::copyArchRegs(ThreadContext *tc) -{ - // This function will mess things up unless the ROB is empty and - // there are no instructions in the pipeline. - unsigned tid = thread->readTid(); - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i); - - DPRINTF(O3CPU, "Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, cpu->readIntReg(renamed_reg), - tc->readIntReg(i)); - - cpu->setIntReg(renamed_reg, tc->readIntReg(i)); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); - cpu->setFloatRegBits(renamed_reg, - tc->readFloatRegBits(i)); - } - - // Copy the misc regs. - copyMiscRegs(tc, this); - - // Then finally set the PC and the next PC. - cpu->setPC(tc->readPC(), tid); - cpu->setNextPC(tc->readNextPC(), tid); -#if !FULL_SYSTEM - this->thread->funcExeInst = tc->readFuncExeInst(); -#endif -} - -template -void -AlphaO3CPU::AlphaTC::clearArchRegs() -{} - -template -uint64_t -AlphaO3CPU::AlphaTC::readIntReg(int reg_idx) -{ - return cpu->readArchIntReg(reg_idx, thread->readTid()); -} - -template -FloatReg -AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx, int width) -{ - switch(width) { - case 32: - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); - case 64: - return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); - default: - panic("Unsupported width!"); - return 0; - } -} - -template -FloatReg -AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx) -{ - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); -} - -template -FloatRegBits -AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx, int width) -{ - DPRINTF(Fault, "Reading floatint register through the TC!\n"); - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template -FloatRegBits -AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx) -{ - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setIntReg(int reg_idx, uint64_t val) -{ - cpu->setArchIntReg(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) -{ - switch(width) { - case 32: - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - break; - case 64: - cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); - break; - } - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val) -{ - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, - int width) -{ - DPRINTF(Fault, "Setting floatint register through the TC!\n"); - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) -{ - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setPC(uint64_t val) -{ - cpu->setPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setNextPC(uint64_t val) -{ - cpu->setNextPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -Fault -AlphaO3CPU::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -template -Fault -AlphaO3CPU::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, - thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -#if !FULL_SYSTEM - -template -TheISA::IntReg -AlphaO3CPU::AlphaTC::getSyscallArg(int i) -{ - return cpu->getSyscallArg(i, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setSyscallArg(int i, IntReg val) -{ - cpu->setSyscallArg(i, val, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setSyscallReturn(SyscallReturn return_value) -{ - cpu->setSyscallReturn(return_value, thread->readTid()); -} - -#endif // FULL_SYSTEM template MiscReg diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc similarity index 95% rename from src/cpu/o3/alpha_dyn_inst.cc rename to src/cpu/o3/alpha/dyn_inst.cc index 0c1723eec1..97d2f3d080 100644 --- a/src/cpu/o3/alpha_dyn_inst.cc +++ b/src/cpu/o3/alpha/dyn_inst.cc @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst_impl.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst_impl.hh" +#include "cpu/o3/alpha/impl.hh" // Force instantiation of AlphaDynInst for all the implementations that // are needed. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh similarity index 99% rename from src/cpu/o3/alpha_dyn_inst.hh rename to src/cpu/o3/alpha/dyn_inst.hh index 464e53e9da..9dee610b68 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -34,8 +34,8 @@ #include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" class Packet; diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh similarity index 99% rename from src/cpu/o3/alpha_dyn_inst_impl.hh rename to src/cpu/o3/alpha/dyn_inst_impl.hh index 855ee99634..2d1b4b3098 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -28,7 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/dyn_inst.hh" template AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh similarity index 98% rename from src/cpu/o3/alpha_impl.hh rename to src/cpu/o3/alpha/impl.hh index 84c9e1c004..cdcdff34a8 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -33,7 +33,7 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" // Forward declarations. @@ -81,7 +81,7 @@ struct AlphaSimpleImpl enum { MaxWidth = 8, - MaxThreads = 4 + MaxThreads = 2 }; }; diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh new file mode 100644 index 0000000000..b1f2a487d2 --- /dev/null +++ b/src/cpu/o3/alpha/params.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_PARAMS_HH__ +#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the AlphaO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public O3Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#else + std::vector workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + + unsigned decodeToFetchDelay; +}; + +#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc new file mode 100755 index 0000000000..4a02715bc6 --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext; + diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh new file mode 100644 index 0000000000..890bff3ffb --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.hh @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template +class AlphaTC : public O3ThreadContext +{ +#if FULL_SYSTEM + /** Returns a pointer to the ITB. */ + virtual AlphaITB *getITBPtr() { return cpu->itb; } + + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return thread->quiesceEvent; + } + + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ + virtual bool inPalMode() + { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } +#endif + + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { + panic("Alpha has no NextNPC!"); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } +}; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh deleted file mode 100644 index d7f3d5801f..0000000000 --- a/src/cpu/o3/alpha_cpu.hh +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#ifndef __CPU_O3_ALPHA_CPU_HH__ -#define __CPU_O3_ALPHA_CPU_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/thread_context.hh" -#include "cpu/o3/cpu.hh" -#include "sim/byteswap.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -class TranslatingPort; - -/** - * AlphaO3CPU class. Derives from the FullO3CPU class, and - * implements all ISA and implementation specific functions of the - * CPU. This is the CPU class that is used for the SimObjects, and is - * what is given to the DynInsts. Most of its state exists in the - * FullO3CPU; the state is has is mainly for ISA specific - * functionality. - */ -template -class AlphaO3CPU : public FullO3CPU -{ - protected: - typedef TheISA::IntReg IntReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::MiscReg MiscReg; - typedef TheISA::RegFile RegFile; - typedef TheISA::MiscRegFile MiscRegFile; - - public: - typedef O3ThreadState ImplState; - typedef O3ThreadState Thread; - typedef typename Impl::Params Params; - - /** Constructs an AlphaO3CPU with the given parameters. */ - AlphaO3CPU(Params *params); - - /** - * Derived ThreadContext class for use with the AlphaO3CPU. It - * provides the interface for any external objects to access a - * single thread's state and some general CPU state. Any time - * external objects try to update state through this interface, - * the CPU will create an event to squash all in-flight - * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaO3CPU because - * not all architectural state is located within the O3ThreadState - * (such as the commit PC, and registers), and specific actions - * must be taken when using this interface (such as squashing all - * in-flight instructions when doing a write to this interface). - */ - class AlphaTC : public ThreadContext - { - public: - /** Pointer to the CPU. */ - AlphaO3CPU *cpu; - - /** Pointer to the thread state that this TC corrseponds to. */ - O3ThreadState *thread; - - /** Returns a pointer to this CPU. */ - virtual BaseCPU *getCpuPtr() { return cpu; } - - /** Sets this CPU's ID. */ - virtual void setCpuId(int id) { cpu->cpu_id = id; } - - /** Reads this CPU's ID. */ - virtual int readCpuId() { return cpu->cpu_id; } - -#if FULL_SYSTEM - /** Returns a pointer to the system. */ - virtual System *getSystemPtr() { return cpu->system; } - - /** Returns a pointer to physical memory. */ - virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } - - /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } - - /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } - - /** Returns a pointer to this thread's kernel statistics. */ - virtual Kernel::Statistics *getKernelStats() - { return thread->kernelStats; } - - virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } - - virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); - - void delVirtPort(VirtualPort *vp); -#else - virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } - - /** Returns a pointer to this thread's process. */ - virtual Process *getProcessPtr() { return thread->getProcessPtr(); } -#endif - /** Returns this thread's status. */ - virtual Status status() const { return thread->status(); } - - /** Sets this thread's status. */ - virtual void setStatus(Status new_status) - { thread->setStatus(new_status); } - - /** Set the status to Active. Optional delay indicates number of - * cycles to wait before beginning execution. */ - virtual void activate(int delay = 1); - - /** Set the status to Suspended. */ - virtual void suspend(); - - /** Set the status to Unallocated. */ - virtual void deallocate(); - - /** Set the status to Halted. */ - virtual void halt(); - -#if FULL_SYSTEM - /** Dumps the function profiling information. - * @todo: Implement. - */ - virtual void dumpFuncProfile(); -#endif - /** Takes over execution of a thread from another CPU. */ - virtual void takeOverFrom(ThreadContext *old_context); - - /** Registers statistics associated with this TC. */ - virtual void regStats(const std::string &name); - - /** Serializes state. */ - virtual void serialize(std::ostream &os); - /** Unserializes state. */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); - -#if FULL_SYSTEM - /** Returns pointer to the quiesce event. */ - virtual EndQuiesceEvent *getQuiesceEvent(); - - /** Reads the last tick that this thread was activated on. */ - virtual Tick readLastActivate(); - /** Reads the last tick that this thread was suspended on. */ - virtual Tick readLastSuspend(); - - /** Clears the function profiling information. */ - virtual void profileClear(); - /** Samples the function profiling information. */ - virtual void profileSample(); -#endif - /** Returns this thread's ID number. */ - virtual int getThreadNum() { return thread->readTid(); } - - /** Returns the instruction this thread is currently committing. - * Only used when an instruction faults. - */ - virtual TheISA::MachInst getInst(); - - /** Copies the architectural registers from another TC into this TC. */ - virtual void copyArchRegs(ThreadContext *tc); - - /** Resets all architectural registers to 0. */ - virtual void clearArchRegs(); - - /** Reads an integer register. */ - virtual uint64_t readIntReg(int reg_idx); - - virtual FloatReg readFloatReg(int reg_idx, int width); - - virtual FloatReg readFloatReg(int reg_idx); - - virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - - virtual FloatRegBits readFloatRegBits(int reg_idx); - - /** Sets an integer register to a value. */ - virtual void setIntReg(int reg_idx, uint64_t val); - - virtual void setFloatReg(int reg_idx, FloatReg val, int width); - - virtual void setFloatReg(int reg_idx, FloatReg val); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val); - - /** Reads this thread's PC. */ - virtual uint64_t readPC() - { return cpu->readPC(thread->readTid()); } - - /** Sets this thread's PC. */ - virtual void setPC(uint64_t val); - - /** Reads this thread's next PC. */ - virtual uint64_t readNextPC() - { return cpu->readNextPC(thread->readTid()); } - - /** Sets this thread's next PC. */ - virtual void setNextPC(uint64_t val); - - virtual uint64_t readNextNPC() - { - panic("Alpha has no NextNPC!"); - return 0; - } - - virtual void setNextNPC(uint64_t val) - { } - - /** Reads a miscellaneous register. */ - virtual MiscReg readMiscReg(int misc_reg) - { return cpu->readMiscReg(misc_reg, thread->readTid()); } - - /** Reads a misc. register, including any side-effects the - * read might have as defined by the architecture. */ - virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } - - /** Sets a misc. register. */ - virtual Fault setMiscReg(int misc_reg, const MiscReg &val); - - /** Sets a misc. register, including any side-effects the - * write might have as defined by the architecture. */ - virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - - /** Returns the number of consecutive store conditional failures. */ - // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - virtual void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } - -#if FULL_SYSTEM - /** Returns if the thread is currently in PAL mode, based on - * the PC's value. */ - virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } -#endif - // Only really makes sense for old CPU model. Lots of code - // outside the CPU still checks this function, so it will - // always return false to keep everything working. - /** Checks if the thread is misspeculating. Because it is - * very difficult to determine if the thread is - * misspeculating, this is set as false. */ - virtual bool misspeculating() { return false; } - -#if !FULL_SYSTEM - /** Gets a syscall argument by index. */ - virtual IntReg getSyscallArg(int i); - - /** Sets a syscall argument. */ - virtual void setSyscallArg(int i, IntReg val); - - /** Sets the syscall return value. */ - virtual void setSyscallReturn(SyscallReturn return_value); - - /** Executes a syscall in SE mode. */ - virtual void syscall(int64_t callnum) - { return cpu->syscall(callnum, thread->readTid()); } - - /** Reads the funcExeInst counter. */ - virtual Counter readFuncExeInst() { return thread->funcExeInst; } -#endif - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) - { panic("Not supported on Alpha!"); } - }; - -#if FULL_SYSTEM - /** ITB pointer. */ - AlphaITB *itb; - /** DTB pointer. */ - AlphaDTB *dtb; -#endif - - /** Registers statistics. */ - void regStats(); - -#if FULL_SYSTEM - /** Translates instruction requestion. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return itb->translate(req, thread->getTC()); - } - - /** Translates data read request. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), false); - } - - /** Translates data write request. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), true); - } - -#else - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - -#endif - /** Reads a miscellaneous register. */ - MiscReg readMiscReg(int misc_reg, unsigned tid); - - /** Reads a misc. register, including any side effects the read - * might have as defined by the architecture. - */ - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); - - /** Sets a miscellaneous register. */ - Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); - - /** Sets a misc. register, including any side effects the write - * might have as defined by the architecture. - */ - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); - - /** Initiates a squash of all in-flight instructions for a given - * thread. The source of the squash is an external update of - * state through the TC. - */ - void squashFromTC(unsigned tid); - -#if FULL_SYSTEM - /** Posts an interrupt. */ - void post_interrupt(int int_num, int index); - /** Reads the interrupt flag. */ - int readIntrFlag(); - /** Sets the interrupt flags. */ - void setIntrFlag(int val); - /** HW return from error interrupt. */ - Fault hwrei(unsigned tid); - /** Returns if a specific PC is a PAL mode PC. */ - bool inPalMode(uint64_t PC) - { return AlphaISA::PcPAL(PC); } - - bool simPalCheck(int palFunc, unsigned tid); - - /** Processes any interrupts. */ - void processInterrupts(); - - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } -#endif - - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); - -#if !FULL_SYSTEM - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - void syscall(int64_t callnum, int tid); - /** Gets a syscall argument. */ - IntReg getSyscallArg(int i, int tid); - - /** Used to shift args for indirect syscall. */ - void setSyscallArg(int i, IntReg val, int tid); - - /** Sets the return value of a syscall. */ - void setSyscallReturn(SyscallReturn return_value, int tid); -#endif - - /** CPU read function, forwards read to LSQ. */ - template - Fault read(RequestPtr &req, T &data, int load_idx) - { - return this->iew.ldstQueue.read(req, data, load_idx); - } - - /** CPU write function, forwards write to LSQ. */ - template - Fault write(RequestPtr &req, T &data, int store_idx) - { - return this->iew.ldstQueue.write(req, data, store_idx); - } - - Addr lockAddr; - - /** Temporary fix for the lock flag, works in the UP case. */ - bool lockFlag; -}; - -#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc index 1f7540d6a9..a0089fb8b0 100644 --- a/src/cpu/o3/base_dyn_inst.cc +++ b/src/cpu/o3/base_dyn_inst.cc @@ -29,8 +29,7 @@ */ #include "cpu/base_dyn_inst_impl.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" // Explicit instantiation template class BaseDynInst; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index c35c0a0aa9..4087fa07bd 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -29,7 +29,6 @@ */ #include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/isa_specific.hh" template class BPredUnit; diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 770008a330..9bbb526dc9 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" template class DefaultCommit; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 5533990481..c2282d6177 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -41,8 +41,7 @@ #include "cpu/activity.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/cpu.hh" #include "sim/root.hh" diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b1ebcce9d0..9565bbe4f3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -49,11 +49,14 @@ #include "cpu/o3/cpu_policy.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/thread_state.hh" +//#include "cpu/o3/thread_context.hh" #include "sim/process.hh" template class Checker; class ThreadContext; +template +class O3ThreadContext; class MemObject; class Process; @@ -67,6 +70,10 @@ class BaseO3CPU : public BaseCPU void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ int readCpuId() { return cpu_id; } protected: @@ -94,6 +101,7 @@ class FullO3CPU : public BaseO3CPU typedef typename std::list::iterator ListIt; + friend class O3ThreadContext; public: enum Status { Running, diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 4924f018a2..52d55983a8 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" template class DefaultDecode; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 5f52d0fca6..39b9879a40 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" template class DefaultFetch; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 8145f4cc73..bf8eb61ac7 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index f2c6b8213e..88f3f33a05 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh new file mode 100755 index 0000000000..f8a9dd8cc6 --- /dev/null +++ b/src/cpu/o3/isa_specific.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#include "cpu/base.hh" + +#if THE_ISA == ALPHA_ISA + #include "cpu/o3/alpha/cpu.hh" + #include "cpu/o3/alpha/impl.hh" + #include "cpu/o3/alpha/params.hh" + #include "cpu/o3/alpha/dyn_inst.hh" +#else + #error "O3CPU doesnt support this ISA" +#endif diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index de0325920b..872576c32a 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -28,9 +28,7 @@ * Authors: Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index e935ffa5c7..9b244ac719 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -29,9 +29,7 @@ * Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index a951032668..3edac95ac1 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/store_set.hh" #include "cpu/o3/mem_dep_unit_impl.hh" diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh old mode 100644 new mode 100755 similarity index 88% rename from src/cpu/o3/alpha_params.hh rename to src/cpu/o3/params.hh index f0732733e2..69a1bb937a --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/params.hh @@ -28,47 +28,29 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_PARAMS_HH__ -#define __CPU_O3_ALPHA_PARAMS_HH__ +#ifndef __CPU_O3_PARAMS_HH__ +#define __CPU_O3_PARAMS_HH__ #include "cpu/o3/cpu.hh" //Forward declarations -class AlphaDTB; -class AlphaITB; class FUPool; -class MemObject; -class Process; -class System; /** - * This file defines the parameters that will be used for the AlphaO3CPU. + * This file defines the parameters that will be used for the O3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ - -class AlphaSimpleParams : public BaseO3CPU::Params +class O3Params : public BaseO3CPU::Params { public: - -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - unsigned activity; // // Caches // -// MemInterface *icacheInterface; -// MemInterface *dcacheInterface; + // MemInterface *icacheInterface; + // MemInterface *dcacheInterface; unsigned cachePorts; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9ca8e82c6e..f972190b77 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" template class DefaultRename; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index f99e5ccfd2..ccef6b1554 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -29,8 +29,7 @@ * Nathan Binkert */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh new file mode 100755 index 0000000000..d60867029a --- /dev/null +++ b/src/cpu/o3/thread_context.hh @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_THREAD_CONTEXT_HH__ +#define __CPU_O3_THREAD_CONTEXT_HH__ + +#include "cpu/o3/isa_specific.hh" + +/** + * Derived ThreadContext class for use with the O3CPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the O3CPU because + * not all architectural state is located within the O3ThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template +class O3ThreadContext : public ThreadContext +{ + public: + typedef typename Impl::O3CPU O3CPU; + + /** Pointer to the CPU. */ + O3CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + O3ThreadState *thread; + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Sets this CPU's ID. */ + virtual void setCpuId(int id) { cpu->setCpuId(id); } + + /** Reads this CPU's ID. */ + virtual int readCpuId() { return cpu->readCpuId(); } + +#if FULL_SYSTEM + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + + /** Returns a pointer to physical memory. */ + virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + /** Returns a pointer to this thread's kernel statistics. */ + virtual Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); + + void delVirtPort(VirtualPort *vp); +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(); + + /** Set the status to Unallocated. */ + virtual void deallocate(); + + /** Set the status to Halted. */ + virtual void halt(); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns this thread's ID number. */ + virtual int getThreadNum() { return thread->readTid(); } + + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->readTid()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->readTid()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->readTid()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } + + /** Sets a misc. register. */ + virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Gets a syscall argument by index. */ + virtual IntReg getSyscallArg(int i); + + /** Sets a syscall argument. */ + virtual void setSyscallArg(int i, IntReg val); + + /** Sets the syscall return value. */ + virtual void setSyscallReturn(SyscallReturn return_value); + + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->readTid()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#endif +}; + +#endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh new file mode 100755 index 0000000000..fccabaf363 --- /dev/null +++ b/src/cpu/o3/thread_context_impl.hh @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +using namespace TheISA; + +#if FULL_SYSTEM +template +VirtualPort * +O3ThreadContext::getVirtPort(ThreadContext *src_tc) +{ + if (!src_tc) + return thread->getVirtPort(); + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("tc-vport", src_tc); + mem_port = cpu->system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +template +void +O3ThreadContext::dumpFuncProfile() +{ + // Currently not supported +} +#endif + +template +void +O3ThreadContext::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Unallocated); + + thread->inSyscall = false; + thread->trapPending = false; +} + +#if FULL_SYSTEM +template +void +O3ThreadContext::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + +template +void +O3ThreadContext::activate(int delay) +{ + DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + if (thread->status() == ThreadContext::Unallocated) { + cpu->activateWhenReady(thread->readTid()); + return; + } + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->readTid(), delay); +} + +template +void +O3ThreadContext::suspend() +{ + DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->readTid()); +} + +template +void +O3ThreadContext::deallocate() +{ + DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); + + if (thread->status() == ThreadContext::Unallocated) + return; + + thread->setStatus(ThreadContext::Unallocated); + cpu->deallocateContext(thread->readTid()); +} + +template +void +O3ThreadContext::halt() +{ + DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->readTid()); +} + +template +void +O3ThreadContext::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template +void +O3ThreadContext::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template +void +O3ThreadContext::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template +Tick +O3ThreadContext::readLastActivate() +{ + return thread->lastActivate; +} + +template +Tick +O3ThreadContext::readLastSuspend() +{ + return thread->lastSuspend; +} + +template +void +O3ThreadContext::profileClear() +{} + +template +void +O3ThreadContext::profileSample() +{} +#endif + +template +TheISA::MachInst +O3ThreadContext:: getInst() +{ + return thread->getInst(); +} + +template +void +O3ThreadContext::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + unsigned tid = thread->readTid(); + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, cpu->readIntReg(renamed_reg), + tc->readIntReg(i)); + + cpu->setIntReg(renamed_reg, tc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + tc->readFloatRegBits(i)); + } + + // Copy the misc regs. + copyMiscRegs(tc, this); + + // Then finally set the PC and the next PC. + cpu->setPC(tc->readPC(), tid); + cpu->setNextPC(tc->readNextPC(), tid); +#if !FULL_SYSTEM + this->thread->funcExeInst = tc->readFuncExeInst(); +#endif +} + +template +void +O3ThreadContext::clearArchRegs() +{} + +template +uint64_t +O3ThreadContext::readIntReg(int reg_idx) +{ + return cpu->readArchIntReg(reg_idx, thread->readTid()); +} + +template +FloatReg +O3ThreadContext::readFloatReg(int reg_idx, int width) +{ + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); + default: + panic("Unsupported width!"); + return 0; + } +} + +template +FloatReg +O3ThreadContext::readFloatReg(int reg_idx) +{ + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); +} + +template +FloatRegBits +O3ThreadContext::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the TC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template +FloatRegBits +O3ThreadContext::readFloatRegBits(int reg_idx) +{ + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template +void +O3ThreadContext::setIntReg(int reg_idx, uint64_t val) +{ + cpu->setArchIntReg(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatReg(int reg_idx, FloatReg val, int width) +{ + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); + break; + } + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatReg(int reg_idx, FloatReg val) +{ + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the TC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setPC(uint64_t val) +{ + cpu->setPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +Fault +O3ThreadContext::setMiscReg(int misc_reg, const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +template +Fault +O3ThreadContext::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, + thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +#if !FULL_SYSTEM + +template +TheISA::IntReg +O3ThreadContext::getSyscallArg(int i) +{ + return cpu->getSyscallArg(i, thread->readTid()); +} + +template +void +O3ThreadContext::setSyscallArg(int i, IntReg val) +{ + cpu->setSyscallArg(i, val, thread->readTid()); +} + +template +void +O3ThreadContext::setSyscallReturn(SyscallReturn return_value) +{ + cpu->setSyscallReturn(return_value, thread->readTid()); +} + +#endif // FULL_SYSTEM + diff --git a/src/python/m5/objects/AlphaO3CPU.py b/src/python/m5/objects/O3CPU.py similarity index 98% rename from src/python/m5/objects/AlphaO3CPU.py rename to src/python/m5/objects/O3CPU.py index f14f8c88ed..4ecfa8fbd6 100644 --- a/src/python/m5/objects/AlphaO3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -2,8 +2,8 @@ from m5 import build_env from m5.config import * from BaseCPU import BaseCPU -class DerivAlphaO3CPU(BaseCPU): - type = 'DerivAlphaO3CPU' +class DerivO3CPU(BaseCPU): + type = 'DerivO3CPU' activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") diff --git a/src/sim/process.cc b/src/sim/process.cc index 9cdc5b9f5d..f989300a39 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -358,7 +358,10 @@ LiveProcess::argsInit(int intSize, int pageSize) Addr prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); + +#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc threadContexts[0]->setNextNPC(prog_entry + (2 * sizeof(MachInst))); +#endif num_processes++; } From 51261196bde3403544631cdb4895c2d2a51c3f1e Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 30 Jun 2006 20:49:31 -0400 Subject: [PATCH 086/152] now O3CPU is totally independent of the ISA... all alpha specific stuff is the cpu/o3/alpha directory src/cpu/o3/alpha/cpu.cc: src/cpu/o3/alpha/cpu_impl.hh: src/cpu/o3/alpha/impl.hh: filenames src/cpu/o3/alpha/thread_context.hh: public src/cpu/o3/base_dyn_inst.cc: src/cpu/o3/bpred_unit.cc: src/cpu/o3/commit.cc: src/cpu/o3/cpu.cc: src/cpu/o3/decode.cc: src/cpu/o3/fetch.cc: src/cpu/o3/iew.cc: src/cpu/o3/inst_queue.cc: src/cpu/o3/lsq.cc: src/cpu/o3/lsq_unit.cc: src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/rename.cc: src/cpu/o3/rob.cc: use O3CPUImpl ... not Alpha src/cpu/o3/checker_builder.cc: filename --HG-- extra : convert_revision : 6eb739909699ade1e2a9d63637b182413ceebc69 --- src/cpu/o3/alpha/cpu.cc | 2 +- src/cpu/o3/alpha/cpu_impl.hh | 4 ++-- src/cpu/o3/alpha/impl.hh | 8 +++++++- src/cpu/o3/alpha/thread_context.hh | 1 + src/cpu/o3/base_dyn_inst.cc | 4 ++-- src/cpu/o3/bpred_unit.cc | 2 +- src/cpu/o3/checker_builder.cc | 4 ++-- src/cpu/o3/commit.cc | 2 +- src/cpu/o3/cpu.cc | 2 +- src/cpu/o3/decode.cc | 2 +- src/cpu/o3/fetch.cc | 2 +- src/cpu/o3/iew.cc | 2 +- src/cpu/o3/inst_queue.cc | 2 +- src/cpu/o3/lsq.cc | 2 +- src/cpu/o3/lsq_unit.cc | 2 +- src/cpu/o3/mem_dep_unit.cc | 10 +++++----- src/cpu/o3/rename.cc | 2 +- src/cpu/o3/rob.cc | 2 +- 18 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/cpu/o3/alpha/cpu.cc b/src/cpu/o3/alpha/cpu.cc index 87a4d03a77..ed10b2fd11 100644 --- a/src/cpu/o3/alpha/cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,7 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alphaimpl.hh" +#include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/cpu_impl.hh" #include "cpu/o3/alpha/dyn_inst.hh" diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index 2da683398b..0473e60c23 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -40,7 +40,7 @@ #include "cpu/o3/alpha/cpu.hh" #include "cpu/o3/alpha/params.hh" -#include "cpu/o3/alpha/tc.hh" +#include "cpu/o3/alpha/thread_context.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/thread_state.hh" @@ -120,7 +120,7 @@ AlphaO3CPU::AlphaO3CPU(Params *params) // CheckerThreadContext. #if USE_CHECKER if (params->checker) { - tc = new CheckerThreadContext>( + tc = new CheckerThreadContext >( alpha_tc, this->checker); } #endif diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index cdcdff34a8..8cd8692c60 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -81,8 +81,14 @@ struct AlphaSimpleImpl enum { MaxWidth = 8, - MaxThreads = 2 + MaxThreads = 4 }; }; +/** The O3Impl to be used. */ +typedef AlphaSimpleImpl O3CPUImpl; + +/** The O3Impl to be used. */ +typedef DynInst O3DynInst; + #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 890bff3ffb..57190d65e3 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -34,6 +34,7 @@ template class AlphaTC : public O3ThreadContext { + public: #if FULL_SYSTEM /** Returns a pointer to the ITB. */ virtual AlphaITB *getITBPtr() { return cpu->itb; } diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc index a0089fb8b0..0979c5c8f1 100644 --- a/src/cpu/o3/base_dyn_inst.cc +++ b/src/cpu/o3/base_dyn_inst.cc @@ -32,8 +32,8 @@ #include "cpu/o3/isa_specific.hh" // Explicit instantiation -template class BaseDynInst; +template class BaseDynInst; template <> int -BaseDynInst::instcount = 0; +BaseDynInst::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index 4087fa07bd..08fd4e8eaf 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -31,4 +31,4 @@ #include "cpu/o3/bpred_unit_impl.hh" #include "cpu/o3/isa_specific.hh" -template class BPredUnit; +template class BPredUnit; diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc index 58c40d00c9..782d963b0c 100644 --- a/src/cpu/o3/checker_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -32,8 +32,8 @@ #include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 9bbb526dc9..637d59f526 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" -template class DefaultCommit; +template class DefaultCommit; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c2282d6177..87fee83614 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1211,4 +1211,4 @@ FullO3CPU::updateThreadPriority() } // Forward declaration of FullO3CPU. -template class FullO3CPU; +template class FullO3CPU; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 52d55983a8..896e38331c 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" -template class DefaultDecode; +template class DefaultDecode; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 39b9879a40..d809b07e47 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" -template class DefaultFetch; +template class DefaultFetch; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index bf8eb61ac7..f99be7fe0a 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -32,4 +32,4 @@ #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" -template class DefaultIEW; +template class DefaultIEW; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 88f3f33a05..a539066f93 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -32,4 +32,4 @@ #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template class InstructionQueue; +template class InstructionQueue; diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index 872576c32a..5279472815 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -32,5 +32,5 @@ #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ; +template class LSQ; diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index 9b244ac719..3ca3fa6674 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -33,5 +33,5 @@ #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit; +template class LSQUnit; diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 3edac95ac1..6a14dcbff4 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -33,17 +33,17 @@ #include "cpu/o3/mem_dep_unit_impl.hh" // Force instantation of memory dependency unit using store sets and -// AlphaSimpleImpl. -template class MemDepUnit; +// O3CPUImpl. +template class MemDepUnit; #ifdef DEBUG template <> int -MemDepUnit::MemDepEntry::memdep_count = 0; +MemDepUnit::MemDepEntry::memdep_count = 0; template <> int -MemDepUnit::MemDepEntry::memdep_insert = 0; +MemDepUnit::MemDepEntry::memdep_insert = 0; template <> int -MemDepUnit::MemDepEntry::memdep_erase = 0; +MemDepUnit::MemDepEntry::memdep_erase = 0; #endif diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index f972190b77..443ada0cb4 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" -template class DefaultRename; +template class DefaultRename; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index ccef6b1554..9976049cdb 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -33,4 +33,4 @@ #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. -template class ROB; +template class ROB; From 62961a291692c960b67158f4152d480c160f8ac8 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 1 Jul 2006 18:52:02 -0400 Subject: [PATCH 087/152] fix cpu builder to build the correct name... add activateThread event and functions src/cpu/o3/alpha/cpu_builder.cc: Have CPU builder build a DerivO3CPU not a DerivAlphaO3CPU src/cpu/o3/cpu.cc: add activateThread Event add activateThread function adjust activateContext to schedule a thread to activate within the CPU instead of activating thread right away. This will lead to stages trying to use threads that arent ready yet and wasting execution time & possibly performance. src/cpu/o3/cpu.hh: add activateThread Event add activateThread function add schedule/descheculed activate thread event --HG-- extra : convert_revision : 236d30dc160910507ad36f7f527ab185ed38dc04 --- src/cpu/o3/alpha/cpu_builder.cc | 26 ++++++------- src/cpu/o3/cpu.cc | 69 +++++++++++++++++++++++++++------ src/cpu/o3/cpu.hh | 53 +++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 25 deletions(-) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index b1e141ff4e..8190256fbf 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -31,21 +31,21 @@ #include #include "cpu/base.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaO3CPU : public AlphaO3CPU +class DerivO3CPU : public AlphaO3CPU { public: - DerivAlphaO3CPU(AlphaSimpleParams *p) + DerivO3CPU(AlphaSimpleParams *p) : AlphaO3CPU(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param clock; Param numThreads; @@ -144,9 +144,9 @@ Param defer_registration; Param function_trace; Param function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -261,11 +261,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) -CREATE_SIM_OBJECT(DerivAlphaO3CPU) +CREATE_SIM_OBJECT(DerivO3CPU) { - DerivAlphaO3CPU *cpu; + DerivO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -386,10 +386,10 @@ CREATE_SIM_OBJECT(DerivAlphaO3CPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaO3CPU(params); + cpu = new DerivO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaO3CPU", DerivAlphaO3CPU) +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 87fee83614..7ed17a91e8 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -85,6 +85,35 @@ FullO3CPU::TickEvent::description() return "FullO3CPU tick event"; } +template +FullO3CPU::ActivateThreadEvent::ActivateThreadEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template +void +FullO3CPU::ActivateThreadEvent::init(int thread_num, + FullO3CPU *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template +void +FullO3CPU::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template +const char * +FullO3CPU::ActivateThreadEvent::description() +{ + return "FullO3CPU \"Activate Thread\" event"; +} + template FullO3CPU::FullO3CPU(Params *params) : BaseO3CPU(params), @@ -257,6 +286,8 @@ FullO3CPU::FullO3CPU(Params *params) lastRunningCycle = curTick; + lastActivatedCycle = -1; + contextSwitch = false; } @@ -574,31 +605,45 @@ FullO3CPU::activateWhenReady(int tid) template void -FullO3CPU::activateContext(int tid, int delay) +FullO3CPU::activateThread(unsigned int tid) { - // Needs to set each stage to running as well. list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate - DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", tid); activeThreads.push_back(tid); } +} - assert(_status == Idle || _status == SwitchedOut); - scheduleTickEvent(delay); +template +void +FullO3CPU::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); - _status = Running; + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 9565bbe4f3..2a9ecff4ed 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -102,6 +102,7 @@ class FullO3CPU : public BaseO3CPU typedef typename std::list::iterator ListIt; friend class O3ThreadContext; + public: enum Status { Running, @@ -114,6 +115,9 @@ class FullO3CPU : public BaseO3CPU /** Overall CPU status. */ Status _status; + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + private: class TickEvent : public Event { @@ -150,6 +154,49 @@ class FullO3CPU : public BaseO3CPU tickEvent.squash(); } + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule thread to activate , regardless of its current state. */ + void scheduleActivateThreadEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + else if (!activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void unscheduleActivateThreadEvent(int tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -167,6 +214,9 @@ class FullO3CPU : public BaseO3CPU /** Initialize the CPU */ void init(); + /** Add Thread to Active Threads List */ + void activateThread(unsigned int tid); + /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -522,6 +572,9 @@ class FullO3CPU : public BaseO3CPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + /** Number of Threads CPU can process */ unsigned numThreads; From 23cfd9489bdff00f926f5dcfb7dd72333fb11bfb Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 1 Jul 2006 19:02:43 -0400 Subject: [PATCH 088/152] traceflag stuff src/base/traceflags.py: add BaseCPU flag, O3CPUAll flag grouping src/cpu/base.cc: Use BaseCPU flag instead of FullCPU flag --HG-- extra : convert_revision : 32f737a2f58eb936634799f1f809e07cbba90179 --- src/base/traceflags.py | 3 ++- src/cpu/base.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index c4dcb695b2..d51236c46f 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -115,6 +115,7 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', + 'BaseCPU' 'O3CPU', 'OzoneCPU', 'FE', @@ -176,7 +177,7 @@ compoundFlagMap = { 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'], + 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'O3CPU', 'Activity','Scoreboard','Writeback'], 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] } diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 40cec416be..9df61d2ced 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -68,12 +68,12 @@ BaseCPU::BaseCPU(Params *p) number_of_threads(p->numberOfThreads), system(p->system) #endif { - DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + DPRINTF(BaseCPU, "BaseCPU: Creating object, mem address %#x.\n", this); // add self to global list of CPUs cpuList.push_back(this); - DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + DPRINTF(BaseCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", this); if (number_of_threads > maxThreadsPerCPU) From c8b3d8a1edbab505e5f9748cfa1ee866ed1fb02f Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 2 Jul 2006 23:11:24 -0400 Subject: [PATCH 089/152] Fix default SMT configuration in O3CPU (i.e. fetch policy, workloads/numThreads) Edit Test3 for newmem src/base/traceflags.py: Add O3CPU flag src/cpu/base.cc: for some reason adding a BaseCPU flag doesnt work so just go back to old way... src/cpu/o3/alpha/cpu_builder.cc: Determine number threads by workload size instead of solely by parameter. Default SMT fetch policy to RoundRobin if it's not specified in Config file src/cpu/o3/commit.hh: only use nextNPC for !ALPHA src/cpu/o3/commit_impl.hh: add FetchTrapPending as condition for commit src/cpu/o3/cpu.cc: panic if active threads is more than Impl::MaxThreads src/cpu/o3/fetch.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/rob.hh: src/cpu/o3/rob_impl.hh: name stuff src/cpu/o3/fetch_impl.hh: fatal if try to use SMT branch count, that's unimplemented right now src/python/m5/config.py: make it clearer that a parameter is not valid within a configuration class --HG-- extra : convert_revision : 55069847304e40e257f9225f0dc3894ce6491b34 --- src/base/traceflags.py | 1 + src/cpu/base.cc | 4 ++-- src/cpu/o3/alpha/cpu_builder.cc | 15 ++++++++++++--- src/cpu/o3/commit.hh | 2 ++ src/cpu/o3/commit_impl.hh | 3 ++- src/cpu/o3/cpu.cc | 10 ++++++++-- src/cpu/o3/fetch_impl.hh | 13 +++++++++---- src/cpu/o3/inst_queue.hh | 1 + src/cpu/o3/inst_queue_impl.hh | 5 +++-- src/cpu/o3/rob.hh | 1 + src/cpu/o3/rob_impl.hh | 1 + src/python/m5/config.py | 2 +- 12 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index d51236c46f..327ce60757 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -121,6 +121,7 @@ baseFlags = [ 'FE', 'IBE', 'BE', + 'O3CPU', 'OzoneLSQ', 'PCEvent', 'PCIA', diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 9df61d2ced..40cec416be 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -68,12 +68,12 @@ BaseCPU::BaseCPU(Params *p) number_of_threads(p->numberOfThreads), system(p->system) #endif { - DPRINTF(BaseCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); // add self to global list of CPUs cpuList.push_back(this); - DPRINTF(BaseCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", this); if (number_of_threads > maxThreadsPerCPU) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 8190256fbf..12d083a436 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -274,12 +274,12 @@ CREATE_SIM_OBJECT(DerivO3CPU) // In non-full-system mode, we infer the number of threads from // the workload if it's not explicitly specified. int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); if (workload.size() == 0) { fatal("Must specify at least one workload!"); } - #endif AlphaSimpleParams *params = new AlphaSimpleParams; @@ -371,7 +371,16 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->numROBEntries = numROBEntries; params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = single_thread; + else + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; params->smtLSQPolicy = smtLSQPolicy; params->smtLSQThreshold = smtLSQThreshold; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 8603262833..60b555269e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -406,8 +406,10 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; +#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index cd7dd47d48..06b8e8a954 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1221,7 +1221,8 @@ DefaultCommit::roundRobin() unsigned tid = *pri_iter; if (commitStatus[tid] == Running || - commitStatus[tid] == Idle) { + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { if (rob->isHeadReady(tid)) { priority_list.erase(pri_iter); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7ed17a91e8..feca4cdf2f 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -127,7 +127,7 @@ FullO3CPU::FullO3CPU(Params *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(params->numberOfThreads,//number of activeThreads + freeList(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs), @@ -135,7 +135,7 @@ FullO3CPU::FullO3CPU(Params *params) params->smtROBPolicy, params->smtROBThreshold, params->numberOfThreads), - scoreboard(params->numberOfThreads,//number of activeThreads + scoreboard(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs, TheISA::NumMiscRegs * number_of_threads, @@ -221,6 +221,12 @@ FullO3CPU::FullO3CPU(Params *params) #if !FULL_SYSTEM int active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " + "edit your workload size."); + } #else int active_threads = 1; #endif diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index e570dbb18d..60eb76d178 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -114,8 +114,6 @@ DefaultFetch::DefaultFetch(Params *params) if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); - DPRINTF(Fetch, "Fetch constructor called\n"); - // Set fetch stage's status to inactive. _status = Inactive; @@ -128,6 +126,8 @@ DefaultFetch::DefaultFetch(Params *params) // Figure out fetch policy if (policy == "singlethread") { fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); } else if (policy == "roundrobin") { fetchPolicy = RoundRobin; DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); @@ -559,7 +559,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid return false; } - DPRINTF(Fetch, "Doing cache access.\n"); + DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); lastIcacheStall[tid] = curTick; @@ -724,12 +724,15 @@ DefaultFetch::tick() // Reset the number of the instruction we're fetching. numInst = 0; +#if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } + if (fromCommit->commitInfo[0].clearInterrupt) { interruptPending = false; } +#endif for (threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { @@ -903,6 +906,8 @@ DefaultFetch::fetch(bool &status_change) return; } + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + // The current PC. Addr &fetch_PC = PC[tid]; @@ -1279,6 +1284,6 @@ int DefaultFetch::branchCount() { list::iterator threads = (*activeThreads).begin(); - warn("Branch Count Fetch policy unimplemented\n"); + panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index d745faf7bd..4c69ca3844 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 1ef1b2cffe..b99bd09000 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include @@ -125,7 +126,7 @@ InstructionQueue::InstructionQueue(Params *params) maxEntries[i] = part_amt; } - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" "%i entries per thread.\n",part_amt); } else if (policy == "threshold") { @@ -140,7 +141,7 @@ InstructionQueue::InstructionQueue(Params *params) maxEntries[i] = thresholdIQ; } - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + DPRINTF(IQ, "IQ sharing policy set to Threshold:" "%i entries per thread.\n",thresholdIQ); } else { assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index b98d7c4c2a..6f8080ef44 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_ROB_HH__ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 6277dd68be..d9978b17f7 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" diff --git a/src/python/m5/config.py b/src/python/m5/config.py index adabe07439..6f2873d40b 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -274,7 +274,7 @@ class MetaSimObject(type): cls._values[attr] = value else: raise AttributeError, \ - "Class %s has no parameter %s" % (cls.__name__, attr) + "Class %s has no parameter \'%s\'" % (cls.__name__, attr) def __getattr__(cls, attr): if cls._values.has_key(attr): From cf58578ba120186947f382893d2e370bd6e436ce Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 2 Jul 2006 23:27:13 -0400 Subject: [PATCH 090/152] typo ... change 'single_thread' to 'round_robin_policy' --HG-- extra : convert_revision : a4a5cb90557f786d42c6178bc6e268312c5ecbee --- src/cpu/o3/alpha/cpu_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 12d083a436..490305cbf2 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -377,7 +377,7 @@ CREATE_SIM_OBJECT(DerivO3CPU) std::string single_thread = "SingleThread"; if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) - params->smtFetchPolicy = single_thread; + params->smtFetchPolicy = round_robin_policy; else params->smtFetchPolicy = smtFetchPolicy; From 19083bc4ce379c03b39ba941c18b11a88b141e18 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Mon, 3 Jul 2006 01:10:19 -0400 Subject: [PATCH 091/152] Added hook to check for SMT workloads. SMT is identified by adding a semicolon between the workloads. Now SMT on the O3CPU can be invoked by "/ALPHA_SE/m5.debug ../configs/test/test.py -d --cmd="hello;hello" -i="file1;file2" I think I am a novice python magician now!!!!.... configs/test/test.py: Added hook to check for SMT workloads. SMT is identified by adding a semicolon between the workloads. Now SMT on the O3CPU can be invoked by "/ALPHA_SE/m5.debug ../configs/test/test.py -d --cmd="hello;hello" --input="file1;file2" (btw, We are back to working for this double hello world case) I am a novice python magician now!!!!.... --HG-- extra : convert_revision : b55e10dce33f5a9dc4c78f90409ec0912bad4292 --- configs/test/test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/configs/test/test.py b/configs/test/test.py index 3095cd1d12..625304a08a 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -51,6 +51,25 @@ if options.timing and options.detailed: if options.timing: cpu = TimingSimpleCPU() elif options.detailed: + #check for SMT workload + workloads = options.cmd.split(';') + if len(workloads) > 1: + process = [] + smt_idx = 0 + inputs = [] + + if options.input != "": + inputs = options.input.split(';') + + for wrkld in workloads: + smt_process = LiveProcess() + smt_process.executable = os.path.join(this_dir, wrkld) + smt_process.cmd = wrkld + " " + options.options + if inputs and inputs[smt_idx]: + smt_process.input = inputs[smt_idx] + process += [smt_process, ] + smt_idx += 1 + cpu = DetailedO3CPU() else: cpu = AtomicSimpleCPU() From f4c5609988731f52f9c5bd84ee2db364bbf6fd97 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Mon, 3 Jul 2006 12:19:35 -0400 Subject: [PATCH 092/152] Fix for FS O3CPU compile ... missing forward class declaration/header file after files got split for ISA-independence src/cpu/o3/alpha/thread_context.hh: Use 'this' when accessing cpu src/cpu/o3/cpu.hh: add numActiveThreds function src/cpu/o3/thread_context.hh: forward class declarations src/cpu/o3/thread_context_impl.hh: add quiesce event header file src/cpu/thread_context.hh: add exit() function to thread context (read comments in file) src/sim/syscall_emul.cc: adjust exitFunc syscall --HG-- extra : convert_revision : 323dc871e2b4f4ee5036be388ceb6634cd85a83e --- src/cpu/o3/alpha/thread_context.hh | 24 ++++++++++++++++++++---- src/cpu/o3/cpu.hh | 4 ++++ src/cpu/o3/thread_context.hh | 7 +++++++ src/cpu/o3/thread_context_impl.hh | 1 + src/cpu/thread_context.hh | 5 +++++ src/sim/syscall_emul.cc | 5 +++-- 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 57190d65e3..78b0ee7882 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -37,21 +37,21 @@ class AlphaTC : public O3ThreadContext public: #if FULL_SYSTEM /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } + virtual AlphaITB *getITBPtr() { return this->cpu->itb; } /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; } /** Returns pointer to the quiesce event. */ virtual EndQuiesceEvent *getQuiesceEvent() { - return thread->quiesceEvent; + return this->thread->quiesceEvent; } /** Returns if the thread is currently in PAL mode, based on * the PC's value. */ virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } + { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); } #endif virtual uint64_t readNextNPC() @@ -68,4 +68,20 @@ class AlphaTC : public O3ThreadContext virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, TheISA::RegFile::ContextVal val) { panic("Not supported on Alpha!"); } + + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread executes the 'exit' + // syscall. + virtual int exit() + { + this->cpu->deallocateContext(this->thread->readTid()); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; + else + return 1; + } }; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2a9ecff4ed..1cff6142d3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -214,6 +214,10 @@ class FullO3CPU : public BaseO3CPU /** Initialize the CPU */ void init(); + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + /** Add Thread to Active Threads List */ void activateThread(unsigned int tid); diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index d60867029a..d097ee63eb 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -34,6 +34,13 @@ #include "cpu/o3/isa_specific.hh" +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + /** * Derived ThreadContext class for use with the O3CPU. It * provides the interface for any external objects to access a diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index fccabaf363..cfb71f6238 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -30,6 +30,7 @@ */ #include "cpu/o3/thread_context.hh" +#include "cpu/quiesce_event.hh" using namespace TheISA; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 3c79e11168..70d7051441 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -247,6 +247,11 @@ class ThreadContext // Same with st cond failures. virtual Counter readFuncExeInst() = 0; + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread calls the exit syscall. + virtual int exit() { return 1; }; #endif virtual void changeRegFileContext(RegFile::ContextParam param, diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index 848b6f8696..e72890612d 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -27,7 +27,6 @@ * * Authors: Steve Reinhardt * Ali Saidi - * Korey Sewell */ #include @@ -92,7 +91,9 @@ SyscallReturn exitFunc(SyscallDesc *desc, int callnum, Process *process, ThreadContext *tc) { - exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + if (tc->exit()) { + exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + } return 1; } From 4201ec84b2dd7d96148bf661124dd7b5d0e7204b Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 5 Jul 2006 15:13:27 -0400 Subject: [PATCH 093/152] Fix some unset values in the request in the timing CPU. Properly implement the MSHR allocate function. src/cpu/simple/timing.cc: Set the thread context in the CPU. Need to do this properly, currently I just set it to Cpu=0 Thread=0. This will just cause all the stats in the cache based on these to just yield totals and not a distribution. src/mem/cache/miss/mshr.cc: Properly implement the allocate function for the MSHR. --HG-- extra : convert_revision : bcece518e54ed1404db3196f996a77b4dd5c1c1e --- src/cpu/simple/timing.cc | 4 +++- src/mem/cache/miss/mshr.cc | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 0729f94898..d5bdcfa9b1 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -207,7 +207,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { // need to fill in CPU & thread IDs here Request *data_read_req = new Request(); - + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { @@ -288,6 +288,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { // need to fill in CPU & thread IDs here Request *data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); // translate to physical address @@ -371,6 +372,7 @@ TimingSimpleCPU::fetch() // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 05a2fe1c59..1a85d3018c 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -57,26 +57,26 @@ void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { - assert("NEED TO FIX YET\n" && 0); -#if 0 - assert(targets.empty()); - addr = _addr; - asid = _asid; - - pkt = new Packet(); // allocate new memory request - pkt->addr = addr; //picked physical address for now - pkt->cmd = cmd; - pkt->size = size; - pkt->data = new uint8_t[size]; - pkt->senderState = this; - //Set the time here for latency calculations - pkt->time = curTick; - - if (target) { - pkt->req = target->req; + if (target) + { + //Have a request, just use it + pkt = new Packet(target->req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; allocateTarget(target); } -#endif + else + { + //need a request first + Request * req = new Request(); + req->setPhys(addr, size, 0); + //Thread context?? + pkt = new Packet(req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; + } } // Since we aren't sure if data is being used, don't copy here. From b84103811df3d0203cdde8524cdcce57ded706be Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 15:51:36 -0400 Subject: [PATCH 094/152] Add some different parameters. The main change is that the writeback count is now limited so that it doesn't overflow the buffer. src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_params.hh: Add in dispatchWidth, wbWidth, wbDepth parameters. wbDepth is the number of cycles of wbWidth instructions that can be buffered. src/cpu/o3/iew.hh: Include separate parameter for dispatch width. Also limit the number of outstanding writebacks so the writeback buffer isn't overflowed. The IQ must make sure with the IEW stage that it can issue instructions prior to issuing. src/cpu/o3/iew_impl.hh: Include separate parameter for dispatch width. Also limit the number of outstanding writebacks so the writeback buffer isn't overflowed. src/cpu/o3/inst_queue_impl.hh: IQ needs to check with the IEW to make sure it can issue instructions, and increments the IEW wb counter each time there is an outstanding instruction that will writeback. src/cpu/o3/lsq_unit_impl.hh: Be sure to decrement the writeback counter if there's a squashed load that returned. src/python/m5/objects/AlphaO3CPU.py: Change the parameters to include dispatch width, writeback width, and writeback depth. --HG-- extra : convert_revision : 31c8cc495273e3c481b79055562fc40f71291fc4 --- src/cpu/o3/alpha_cpu_builder.cc | 9 +++++ src/cpu/o3/alpha_params.hh | 3 ++ src/cpu/o3/iew.hh | 57 ++++++++++++++++++++++++++--- src/cpu/o3/iew_impl.hh | 17 +++++++-- src/cpu/o3/inst_queue_impl.hh | 2 + src/cpu/o3/lsq_unit_impl.hh | 1 + src/python/m5/objects/AlphaO3CPU.py | 8 ++-- 7 files changed, 83 insertions(+), 14 deletions(-) diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index b1e141ff4e..4f5dd04653 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -91,7 +91,10 @@ Param renameWidth; Param commitToIEWDelay; Param renameToIEWDelay; Param issueToExecuteDelay; +Param dispatchWidth; Param issueWidth; +Param wbWidth; +Param wbDepth; SimObjectParam fuPool; Param iewToCommitDelay; @@ -207,7 +210,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) "Issue/Execute/Writeback delay"), INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" "to the IEW stage)"), + INIT_PARAM(dispatchWidth, "Dispatch width"), INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(wbWidth, "Writeback width"), + INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"), INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " @@ -333,7 +339,10 @@ CREATE_SIM_OBJECT(DerivAlphaO3CPU) params->commitToIEWDelay = commitToIEWDelay; params->renameToIEWDelay = renameToIEWDelay; params->issueToExecuteDelay = issueToExecuteDelay; + params->dispatchWidth = dispatchWidth; params->issueWidth = issueWidth; + params->wbWidth = wbWidth; + params->wbDepth = wbDepth; params->fuPool = fuPool; params->iewToCommitDelay = iewToCommitDelay; diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index f0732733e2..78246e3536 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -104,7 +104,10 @@ class AlphaSimpleParams : public BaseO3CPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 2af68d8fcf..9627609c22 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -204,6 +204,45 @@ class DefaultIEW /** Returns if the LSQ has any stores to writeback. */ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding); +#if DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#if DEBUG + std::set wbList; + + void dumpWb() + { + std::set::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + private: /** Sends commit proper information for a squash due to a branch * mispredict. @@ -384,11 +423,8 @@ class DefaultIEW */ unsigned issueToExecuteDelay; - /** Width of issue's read path, in instructions. The read path is both - * the skid buffer and the rename instruction queue. - * Note to self: is this really different than issueWidth? - */ - unsigned issueReadWidth; + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; /** Width of issue, in instructions. */ unsigned issueWidth; @@ -403,6 +439,17 @@ class DefaultIEW */ unsigned wbCycle; + /** Number of instructions in flight that will writeback. */ + unsigned wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + /** Number of active threads. */ unsigned numThreads; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 8e6fd46a11..118038b651 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -50,8 +50,10 @@ DefaultIEW::DefaultIEW(Params *params) commitToIEWDelay(params->commitToIEWDelay), renameToIEWDelay(params->renameToIEWDelay), issueToExecuteDelay(params->issueToExecuteDelay), - issueReadWidth(params->issueWidth), + dispatchWidth(params->dispatchWidth), issueWidth(params->issueWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), numThreads(params->numberOfThreads), switchedOut(false) { @@ -74,8 +76,12 @@ DefaultIEW::DefaultIEW(Params *params) fetchRedirect[i] = false; } + wbMax = wbWidth * params->wbDepth; + updateLSQNextCycle = false; + ableToIssue = true; + skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; } @@ -559,12 +565,12 @@ DefaultIEW::instToCommit(DynInstPtr &inst) // free slot. while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++wbNumInst; - if (wbNumInst == issueWidth) { + if (wbNumInst == wbWidth) { ++wbCycle; wbNumInst = 0; } - assert(wbCycle < 5); + assert((wbCycle * wbWidth + wbNumInst) < wbMax); } // Add finished instruction to queue to commit. @@ -937,7 +943,7 @@ DefaultIEW::dispatchInsts(unsigned tid) // Loop through the instructions, putting them in the instruction // queue. for ( ; dis_num_inst < insts_to_add && - dis_num_inst < issueReadWidth; + dis_num_inst < dispatchWidth; ++dis_num_inst) { inst = insts_to_dispatch.front(); @@ -1189,6 +1195,7 @@ DefaultIEW::executeInsts() ++iewExecSquashedInsts; + decrWb(inst->seqNum); continue; } @@ -1351,6 +1358,8 @@ DefaultIEW::writebackInsts() } writebackCount[tid]++; } + + decrWb(inst->seqNum); } } diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 1ef1b2cffe..61c04cc2b9 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -686,6 +686,7 @@ InstructionQueue::scheduleReadyInsts() int total_issued = 0; while (total_issued < totalWidth && + iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -783,6 +784,7 @@ InstructionQueue::scheduleReadyInsts() listOrder.erase(order_it++); statIssuedInstType[tid][op_class]++; + iewStage->incrWb(issuing_inst->seqNum); } else { statFuBusy[op_class]++; fuBusy[tid]++; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 714acb2ef7..bb3da7eec1 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -77,6 +77,7 @@ LSQUnit::completeDataAccess(PacketPtr pkt) //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); if (isSwitchedOut() || inst->isSquashed()) { + iewStage->decrWb(inst->seqNum); delete state; delete pkt; return; diff --git a/src/python/m5/objects/AlphaO3CPU.py b/src/python/m5/objects/AlphaO3CPU.py index f14f8c88ed..e7c10987ad 100644 --- a/src/python/m5/objects/AlphaO3CPU.py +++ b/src/python/m5/objects/AlphaO3CPU.py @@ -37,12 +37,10 @@ class DerivAlphaO3CPU(BaseCPU): "Issue/Execute/Writeback delay") issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " "to the IEW stage)") + dispatchWidth = Param.Unsigned("Dispatch width") issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") + wbWidth = Param.Unsigned("Writeback width") + wbDepth = Param.Unsigned("Writeback depth") fuPool = Param.FUPool(NULL, "Functional Unit pool") iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " From b0ed531bd85463bf3c4c44c8f6aa34559a321682 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 15:53:22 -0400 Subject: [PATCH 095/152] Split off files that are shared across the O3 and Ozone models. --HG-- extra : convert_revision : 023e84660d5cee5162d39548f87e5ca8ec68115f --- src/cpu/SConscript | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/cpu/SConscript b/src/cpu/SConscript index baa5d531e4..8b584ba57b 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -130,15 +130,15 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') +need_bp_unit = False if 'AlphaO3CPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' - o3/2bit_local_pred.cc o3/alpha_dyn_inst.cc o3/alpha_cpu.cc o3/alpha_cpu_builder.cc o3/base_dyn_inst.cc o3/bpred_unit.cc - o3/btb.cc o3/commit.cc o3/decode.cc o3/fetch.cc @@ -150,18 +150,17 @@ if 'AlphaO3CPU' in env['CPU_MODELS']: o3/lsq_unit.cc o3/lsq.cc o3/mem_dep_unit.cc - o3/ras.cc o3/rename.cc o3/rename_map.cc o3/rob.cc o3/scoreboard.cc o3/store_set.cc - o3/tournament_pred.cc ''') if env['USE_CHECKER']: sources += Split('o3/checker_builder.cc') if 'OzoneCPU' in env['CPU_MODELS']: + need_bp_unit = True sources += Split(''' ozone/base_dyn_inst.cc ozone/bpred_unit.cc @@ -176,6 +175,14 @@ if 'OzoneCPU' in env['CPU_MODELS']: if env['USE_CHECKER']: sources += Split('ozone/checker_builder.cc') +if need_bp_unit: + sources += Split(''' + o3/2bit_local_pred.cc + o3/btb.cc + o3/ras.cc + o3/tournament_pred.cc + ''') + if env['USE_CHECKER']: sources += Split('checker/cpu.cc') checker_supports = False From e41fd9cb6309083c32a2795e3ac449628138d441 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 15:55:45 -0400 Subject: [PATCH 096/152] Alphabetize traceflags, rename FullCPUAll flag to O3CPUAll. --HG-- extra : convert_revision : f558966154376223674c82d513afc2dad6591426 --- src/base/traceflags.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index c4dcb695b2..27c24107c9 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -48,8 +48,10 @@ ccfilename = sys.argv[1] + '.cc' # To define a new flag, simply add it to this list. # baseFlags = [ + 'Activity', 'AlphaConsole', 'BADADDR', + 'BE', 'BPredRAS', 'Bus', 'BusAddrRanges', @@ -84,6 +86,7 @@ baseFlags = [ 'EthernetPIO', 'EthernetSM', 'Event', + 'FE', 'Fault', 'Fetch', 'Flow', @@ -97,6 +100,7 @@ baseFlags = [ 'GDBSend', 'GDBWrite', 'HWPrefetch', + 'IBE', 'IEW', 'IIC', 'IICMore', @@ -117,9 +121,6 @@ baseFlags = [ 'MemDepUnit', 'O3CPU', 'OzoneCPU', - 'FE', - 'IBE', - 'BE', 'OzoneLSQ', 'PCEvent', 'PCIA', @@ -133,6 +134,7 @@ baseFlags = [ 'RenameMap', 'SQL', 'Sampler', + 'Scoreboard', 'ScsiCtrl', 'ScsiDisk', 'ScsiNone', @@ -156,8 +158,6 @@ baseFlags = [ 'Uart', 'VtoPhys', 'WriteBarrier', - 'Activity', - 'Scoreboard', 'Writeback', ] @@ -176,7 +176,7 @@ compoundFlagMap = { 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'], + 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'], 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] } From ae78c465313d6ca1dc71e8b9731e952bb3c8c09b Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 15:57:02 -0400 Subject: [PATCH 097/152] Need to change state upon quiescing. --HG-- extra : convert_revision : 25e3b0a463a0191cab9290665409d0abca6a179a --- src/cpu/simple/timing.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 0729f94898..ab4acdedab 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -118,6 +118,7 @@ TimingSimpleCPU::quiesce(Event *quiesce_event) // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { DPRINTF(Config, "Ready to quiesce\n"); + changeState(SimObject::QuiescedTiming); return false; } else { DPRINTF(Config, "Waiting to quiesce\n"); From ea9697250cf55add36af1548b524ab22d6a2cf94 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 16:54:24 -0400 Subject: [PATCH 098/152] Fix up some merge problems. src/base/traceflags.py: Remove BaseCPU traceflag. src/cpu/o3/alpha/params.hh: Move non-Alpha specific parameters out of this params class. src/cpu/o3/params.hh: Move non-Alpha specific params into this params class. --HG-- extra : convert_revision : e5b652adb47a240376733400e6054c66c50bd514 --- src/base/traceflags.py | 1 - src/cpu/o3/alpha/params.hh | 14 +------------- src/cpu/o3/params.hh | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 9ceba3017b..27c24107c9 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -51,7 +51,6 @@ baseFlags = [ 'Activity', 'AlphaConsole', 'BADADDR', - 'BaseCPU', 'BE', 'BPredRAS', 'Bus', diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh index 8f7364dd0b..c618cee08b 100644 --- a/src/cpu/o3/alpha/params.hh +++ b/src/cpu/o3/alpha/params.hh @@ -54,19 +54,7 @@ class AlphaSimpleParams : public O3Params #if FULL_SYSTEM AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - - unsigned decodeToFetchDelay; - unsigned dispatchWidth; - unsigned wbWidth; - unsigned wbDepth; +#endif }; #endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh index 69a1bb937a..ed53fa97ae 100755 --- a/src/cpu/o3/params.hh +++ b/src/cpu/o3/params.hh @@ -46,6 +46,18 @@ class O3Params : public BaseO3CPU::Params public: unsigned activity; + // + // Pointers to key objects + // +#if !FULL_SYSTEM + std::vector workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + // // Caches // @@ -86,7 +98,10 @@ class O3Params : public BaseO3CPU::Params unsigned commitToIEWDelay; unsigned renameToIEWDelay; unsigned issueToExecuteDelay; + unsigned dispatchWidth; unsigned issueWidth; + unsigned wbWidth; + unsigned wbDepth; FUPool *fuPool; // From bd26dbdb13108bffed1c246a450029a3322dba4c Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 17:25:37 -0400 Subject: [PATCH 099/152] Checker ignores any faults that occur in syscall emulation mode for now. src/cpu/checker/cpu_impl.hh: The only fault we handle in SE causes troubles when invoked with the Checker. This is because it changes state within the process, and not the checker, so the state isn't correct when the main CPU calls invoke. It's safe to just ignore the fault in the Checker and continue. --HG-- extra : convert_revision : 5000d763a75009c7a6011646a6790ac5b23df6bb --- src/cpu/checker/cpu_impl.hh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 137e1c46d3..7c1efb0b18 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -236,9 +236,7 @@ Checker::verify(DynInstPtr &completed_inst) willChangePC = true; newPC = thread->readPC(); DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, thread->readPC()); -#endif // FULL_SYSTEM +#endif } else { #if THE_ISA != MIPS_ISA // go to the next instruction From d8fd09cc159a7b5b0d314a41b09cfcdef91de55f Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 17:59:33 -0400 Subject: [PATCH 100/152] Rename quiesce to drain to avoid confusion with the pseudo instruction. src/cpu/simple/timing.cc: src/cpu/simple/timing.hh: src/python/m5/__init__.py: src/python/m5/config.py: src/sim/main.cc: src/sim/sim_events.cc: src/sim/sim_events.hh: src/sim/sim_object.cc: src/sim/sim_object.hh: Rename quiesce to drain. --HG-- extra : convert_revision : fc3244a3934812e1edb8050f1f51f30382baf774 --- src/cpu/simple/timing.cc | 30 ++++++++++++++---------------- src/cpu/simple/timing.hh | 6 +++--- src/python/m5/__init__.py | 30 +++++++++++++++--------------- src/python/m5/config.py | 6 +++--- src/sim/main.cc | 14 +++++++------- src/sim/sim_events.cc | 4 ++-- src/sim/sim_events.hh | 6 +++--- src/sim/sim_object.cc | 20 ++++++++++---------- src/sim/sim_object.hh | 8 ++++---- 9 files changed, 61 insertions(+), 63 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 877364effe..ad04c8d3bd 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -88,7 +88,7 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) { _status = Idle; ifetch_pkt = dcache_pkt = NULL; - quiesceEvent = NULL; + drainEvent = NULL; state = SimObject::Timing; } @@ -112,18 +112,16 @@ TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) } bool -TimingSimpleCPU::quiesce(Event *quiesce_event) +TimingSimpleCPU::drain(Event *drain_event) { - // TimingSimpleCPU is ready to quiesce if it's not waiting for + // TimingSimpleCPU is ready to drain if it's not waiting for // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { - DPRINTF(Config, "Ready to quiesce\n"); - changeState(SimObject::QuiescedTiming); + changeState(SimObject::DrainedTiming); return false; } else { - DPRINTF(Config, "Waiting to quiesce\n"); - changeState(SimObject::Quiescing); - quiesceEvent = quiesce_event; + changeState(SimObject::Draining); + drainEvent = drain_event; return true; } } @@ -423,8 +421,8 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) delete pkt->req; delete pkt; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); return; } @@ -480,8 +478,8 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) assert(_status == DcacheWaitResponse); _status = Running; - if (getState() == SimObject::Quiescing) { - completeQuiesce(); + if (getState() == SimObject::Draining) { + completeDrain(); delete pkt->req; delete pkt; @@ -500,11 +498,11 @@ TimingSimpleCPU::completeDataAccess(Packet *pkt) void -TimingSimpleCPU::completeQuiesce() +TimingSimpleCPU::completeDrain() { - DPRINTF(Config, "Done quiescing\n"); - changeState(SimObject::QuiescedTiming); - quiesceEvent->process(); + DPRINTF(Config, "Done draining\n"); + changeState(SimObject::DrainedTiming); + drainEvent->process(); } bool diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index d91144e4a6..c360e553e4 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -64,7 +64,7 @@ class TimingSimpleCPU : public BaseSimpleCPU Status status() const { return _status; } - Event *quiesceEvent; + Event *drainEvent; private: @@ -133,7 +133,7 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - virtual bool quiesce(Event *quiesce_event); + virtual bool drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); @@ -154,7 +154,7 @@ class TimingSimpleCPU : public BaseSimpleCPU void completeDataAccess(Packet *); void advanceInst(Fault fault); private: - void completeQuiesce(); + void completeDrain(); }; #endif // __CPU_SIMPLE_TIMING_HH__ diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 828165d155..579785a469 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -213,14 +213,14 @@ atexit.register(cc_main.doExitCleanup) # matter since most scripts will probably 'from m5.objects import *'. import objects -def doQuiesce(root): - quiesce = cc_main.createCountedQuiesce() - unready_objects = root.startQuiesce(quiesce, True) - # If we've got some objects that can't quiesce immediately, then simulate +def doDrain(root): + drain_event = cc_main.createCountedDrain() + unready_objects = root.startDrain(drain_event, True) + # If we've got some objects that can't drain immediately, then simulate if unready_objects > 0: - quiesce.setCount(unready_objects) + drain_event.setCount(unready_objects) simulate() - cc_main.cleanupCountedQuiesce(quiesce) + cc_main.cleanupCountedDrain(drain_event) def resume(root): root.resume() @@ -228,7 +228,7 @@ def resume(root): def checkpoint(root): if not isinstance(root, objects.Root): raise TypeError, "Object is not a root object. Checkpoint must be called on a root object." - doQuiesce(root) + doDrain(root) print "Writing checkpoint" cc_main.serializeAll() resume(root) @@ -241,7 +241,7 @@ def changeToAtomic(system): if not isinstance(system, objects.Root) and not isinstance(system, System): raise TypeError, "Object is not a root or system object. Checkpoint must be " "called on a root object." - doQuiesce(system) + doDrain(system) print "Changing memory mode to atomic" system.changeTiming(cc_main.SimObject.Atomic) resume(system) @@ -250,7 +250,7 @@ def changeToTiming(system): if not isinstance(system, objects.Root) and not isinstance(system, System): raise TypeError, "Object is not a root or system object. Checkpoint must be " "called on a root object." - doQuiesce(system) + doDrain(system) print "Changing memory mode to timing" system.changeTiming(cc_main.SimObject.Timing) resume(system) @@ -271,16 +271,16 @@ def switchCpus(cpuList): if not isinstance(cpu, objects.BaseCPU): raise TypeError, "%s is not of type BaseCPU", cpu - # Quiesce all of the individual CPUs - quiesce = cc_main.createCountedQuiesce() + # Drain all of the individual CPUs + drain_event = cc_main.createCountedDrain() unready_cpus = 0 for old_cpu in old_cpus: - unready_cpus += old_cpu.startQuiesce(quiesce, False) - # If we've got some objects that can't quiesce immediately, then simulate + unready_cpus += old_cpu.startDrain(drain_event, False) + # If we've got some objects that can't drain immediately, then simulate if unready_cpus > 0: - quiesce.setCount(unready_cpus) + drain_event.setCount(unready_cpus) simulate() - cc_main.cleanupCountedQuiesce(quiesce) + cc_main.cleanupCountedDrain(drain_event) # Now all of the CPUs are ready to be switched out for old_cpu in old_cpus: old_cpu._ccObject.switchOut() diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 6f2873d40b..cffe069848 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -543,15 +543,15 @@ class SimObject(object): for child in self._children.itervalues(): child.connectPorts() - def startQuiesce(self, quiesce_event, recursive): + def startDrain(self, drain_event, recursive): count = 0 # ParamContexts don't serialize if isinstance(self, SimObject) and not isinstance(self, ParamContext): - if self._ccObject.quiesce(quiesce_event): + if self._ccObject.drain(drain_event): count = 1 if recursive: for child in self._children.itervalues(): - count += child.startQuiesce(quiesce_event, True) + count += child.startDrain(drain_event, True) return count def resume(self): diff --git a/src/sim/main.cc b/src/sim/main.cc index 3eb7fa95d4..e96a449307 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -523,19 +523,19 @@ simulate(Tick num_cycles = -1) } Event * -createCountedQuiesce() +createCountedDrain() { - return new CountedQuiesceEvent(); + return new CountedDrainEvent(); } void -cleanupCountedQuiesce(Event *counted_quiesce) +cleanupCountedDrain(Event *counted_drain) { - CountedQuiesceEvent *event = - dynamic_cast(counted_quiesce); + CountedDrainEvent *event = + dynamic_cast(counted_drain); if (event == NULL) { - fatal("Called cleanupCountedQuiesce() on an event that was not " - "a CountedQuiesceEvent."); + fatal("Called cleanupCountedDrain() on an event that was not " + "a CountedDrainEvent."); } assert(event->getCount() == 0); delete event; diff --git a/src/sim/sim_events.cc b/src/sim/sim_events.cc index 97f7ae03cb..d9e8bdeaac 100644 --- a/src/sim/sim_events.cc +++ b/src/sim/sim_events.cc @@ -79,10 +79,10 @@ exitSimLoop(const std::string &message, int exit_code) } void -CountedQuiesceEvent::process() +CountedDrainEvent::process() { if (--count == 0) { - exitSimLoop("Finished quiesce"); + exitSimLoop("Finished drain"); } } diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 50368f258f..3c4a9dd05f 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -67,13 +67,13 @@ class SimLoopExitEvent : public Event virtual const char *description(); }; -class CountedQuiesceEvent : public SimLoopExitEvent +class CountedDrainEvent : public SimLoopExitEvent { private: - // Count down to quiescing + // Count of how many objects have not yet drained int count; public: - CountedQuiesceEvent() + CountedDrainEvent() : count(0) { } void process(); diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 551555b257..4205b5762c 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -271,22 +271,22 @@ SimObject::recordEvent(const std::string &stat) } bool -SimObject::quiesce(Event *quiesce_event) +SimObject::drain(Event *drain_event) { - if (state != QuiescedAtomic && state != Atomic) { - panic("Must implement your own quiesce function if it is to be used " + if (state != DrainedAtomic && state != Atomic) { + panic("Must implement your own drain function if it is to be used " "in timing mode!"); } - state = QuiescedAtomic; + state = DrainedAtomic; return false; } void SimObject::resume() { - if (state == QuiescedAtomic) { + if (state == DrainedAtomic) { state = Atomic; - } else if (state == QuiescedTiming) { + } else if (state == DrainedTiming) { state = Timing; } } @@ -295,10 +295,10 @@ void SimObject::setMemoryMode(State new_mode) { assert(new_mode == Timing || new_mode == Atomic); - if (state == QuiescedAtomic && new_mode == Timing) { - state = QuiescedTiming; - } else if (state == QuiescedTiming && new_mode == Atomic) { - state = QuiescedAtomic; + if (state == DrainedAtomic && new_mode == Timing) { + state = DrainedTiming; + } else if (state == DrainedTiming && new_mode == Atomic) { + state = DrainedAtomic; } else { state = new_mode; } diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index e0b21782fb..4833192d69 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -62,9 +62,9 @@ class SimObject : public Serializable, protected StartupCallback enum State { Atomic, Timing, - Quiescing, - QuiescedAtomic, - QuiescedTiming + Draining, + DrainedAtomic, + DrainedTiming }; protected: @@ -117,7 +117,7 @@ class SimObject : public Serializable, protected StartupCallback // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model // Quiesce returns true if the SimObject cannot quiesce immediately. - virtual bool quiesce(Event *quiesce_event); + virtual bool drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); virtual void switchOut(); From d598061dd6e9aa83ef2613e2c7825a491c53b893 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 21:14:36 -0400 Subject: [PATCH 101/152] Remove sampler and serializer. Now they are handled through C++ interacting with Python. src/SConscript: src/cpu/base.cc: src/cpu/base.hh: src/cpu/checker/cpu.hh: src/cpu/checker/cpu_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/ozone/cpu.hh: src/cpu/ozone/cpu_impl.hh: src/cpu/simple/base.cc: src/cpu/simple/base.hh: src/sim/pseudo_inst.cc: Remove sampler. src/sim/sim_object.cc: Remove serializer. --HG-- extra : convert_revision : ce7616189440f3dc70040148da6d07309a386008 --- src/SConscript | 1 - src/cpu/base.cc | 1 - src/cpu/base.hh | 1 - src/cpu/checker/cpu.hh | 3 +-- src/cpu/checker/cpu_impl.hh | 2 +- src/cpu/o3/cpu.cc | 6 ++---- src/cpu/o3/cpu.hh | 5 +---- src/cpu/o3/fetch.hh | 2 -- src/cpu/ozone/cpu.hh | 5 +---- src/cpu/ozone/cpu_impl.hh | 6 ++---- src/cpu/simple/base.cc | 1 - src/cpu/simple/base.hh | 6 ------ src/sim/pseudo_inst.cc | 4 ---- src/sim/sim_object.cc | 1 - 14 files changed, 8 insertions(+), 36 deletions(-) diff --git a/src/SConscript b/src/SConscript index 0d0cb24867..9825cafe7b 100644 --- a/src/SConscript +++ b/src/SConscript @@ -89,7 +89,6 @@ base_sources = Split(''' cpu/pc_event.cc cpu/quiesce_event.cc cpu/static_inst.cc - cpu/sampler/sampler.cc cpu/simple_thread.cc cpu/thread_state.cc diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 40cec416be..0b9c80591b 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -41,7 +41,6 @@ #include "cpu/cpuevent.hh" #include "cpu/thread_context.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "sim/param.hh" #include "sim/process.hh" #include "sim/sim_events.hh" diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 51f3bb905d..5256a411f4 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -36,7 +36,6 @@ #include "base/statistics.hh" #include "config/full_system.hh" -#include "cpu/sampler/sampler.hh" #include "sim/eventq.hh" #include "sim/sim_object.hh" #include "arch/isa_traits.hh" diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 785387e600..b520e1be0d 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -66,7 +66,6 @@ class ThreadContext; class MemInterface; class Checkpoint; class Request; -class Sampler; /** * CheckerCPU class. Dynamically verifies instructions as they are @@ -374,7 +373,7 @@ class Checker : public CheckerCPU : CheckerCPU(p) { } - void switchOut(Sampler *s); + void switchOut(); void takeOverFrom(BaseCPU *oldCPU); void verify(DynInstPtr &inst); diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index 7c1efb0b18..81f97726c5 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -293,7 +293,7 @@ Checker::verify(DynInstPtr &completed_inst) template void -Checker::switchOut(Sampler *s) +Checker::switchOut() { instList.clear(); } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index feca4cdf2f..fb7739db87 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -714,9 +714,8 @@ FullO3CPU::haltContext(int tid) template void -FullO3CPU::switchOut(Sampler *_sampler) +FullO3CPU::switchOut() { - sampler = _sampler; switchCount = 0; fetch.switchOut(); decode.switchOut(); @@ -745,12 +744,11 @@ FullO3CPU::signalSwitched() #if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); #endif if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); _status = SwitchedOut; } assert(switchCount <= 5); diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 1cff6142d3..bd04516019 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -271,7 +271,7 @@ class FullO3CPU : public BaseO3CPU virtual void syscall(int tid) { panic("Unimplemented!"); } /** Switches out this CPU. */ - void switchOut(Sampler *sampler); + void switchOut(); /** Signals to this CPU that a stage has completed switching out. */ void signalSwitched(); @@ -550,9 +550,6 @@ class FullO3CPU : public BaseO3CPU /** Pointer to memory. */ MemObject *mem; - /** Pointer to the sampler */ - Sampler *sampler; - /** Counter of how many stages have completed switching out. */ int switchCount; diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 7fcd21b7d4..848ebf39e0 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -40,8 +40,6 @@ #include "mem/port.hh" #include "sim/eventq.hh" -class Sampler; - /** * DefaultFetch class handles both single threaded and SMT fetch. Its * width is specified by the parameters; each cycle it tries to fetch diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index f726ac99b4..8993781ea0 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -55,7 +55,6 @@ class AlphaDTB; class PhysicalMemory; class MemoryController; -class Sampler; class RemoteGDB; class GDBListener; @@ -356,12 +355,10 @@ class OzoneCPU : public BaseCPU int cpuId; - void switchOut(Sampler *sampler); + void switchOut(); void signalSwitched(); void takeOverFrom(BaseCPU *oldCPU); - Sampler *sampler; - int switchCount; #if FULL_SYSTEM diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 2cdc8a3da9..ccb1c8418d 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -244,9 +244,8 @@ OzoneCPU::~OzoneCPU() template void -OzoneCPU::switchOut(Sampler *_sampler) +OzoneCPU::switchOut() { - sampler = _sampler; switchCount = 0; // Front end needs state from back end, so switch out the back end first. backEnd->switchOut(); @@ -262,13 +261,12 @@ OzoneCPU::signalSwitched() frontEnd->doSwitchOut(); #if USE_CHECKER if (checker) - checker->switchOut(sampler); + checker->switchOut(); #endif _status = SwitchedOut; if (tickEvent.scheduled()) tickEvent.squash(); - sampler->signalSwitched(); } assert(switchCount <= 2); } diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index db5dd2acf5..a505411896 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -41,7 +41,6 @@ #include "cpu/base.hh" #include "cpu/exetrace.hh" #include "cpu/profile.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/simple/base.hh" #include "cpu/simple_thread.hh" #include "cpu/smt.hh" diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 39bc86050e..57cfa3c2cd 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -38,7 +38,6 @@ #include "cpu/base.hh" #include "cpu/simple_thread.hh" #include "cpu/pc_event.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/port.hh" @@ -128,11 +127,6 @@ class BaseSimpleCPU : public BaseCPU // Static data storage TheISA::IntReg dataReg; - // Pointer to the sampler that is telling us to switchover. - // Used to signal the completion of the pipe drain and schedule - // the next switchover - Sampler *sampler; - StaticInstPtr curStaticInst; void checkForInterrupts(); diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index b2854e491d..dc08e6c06e 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -52,8 +52,6 @@ using namespace std; -extern Sampler *SampCPU; - using namespace Stats; using namespace TheISA; @@ -280,7 +278,5 @@ namespace AlphaPseudo void switchcpu(ThreadContext *tc) { - if (SampCPU) - SampCPU->switchCPUs(); } } diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 4205b5762c..655bdcf4ec 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -37,7 +37,6 @@ #include "base/misc.hh" #include "base/trace.hh" #include "base/stats/events.hh" -#include "base/serializer.hh" #include "sim/host.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" From eceba1405a70db1a1bc5538af99baca8fef422b6 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 5 Jul 2006 23:38:11 -0400 Subject: [PATCH 102/152] For now using the checkpoint or switchcpu pseudo instructions will return control to Python, returning the cause to be the instruction name. The user's script must then interpret the reason for exiting the simulation loop and handle the action accordingly. This may change in the future. src/sim/pseudo_inst.cc: Exit sim loop with a specific string to indicate to Python what caused the exit. The user's script needs to interpret the exit events and handle them as desired. --HG-- extra : convert_revision : 8eb4a42285dacb3ada3a791173c605b5acb78598 --- src/sim/pseudo_inst.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index dc08e6c06e..869805f5c7 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -207,6 +207,7 @@ namespace AlphaPseudo { if (!doCheckpointInsts) return; + exitSimLoop("checkpoint"); } uint64_t @@ -278,5 +279,6 @@ namespace AlphaPseudo void switchcpu(ThreadContext *tc) { + exitSimLoop("switchcpu"); } } From 215041215b06f330d072b0537d7fe70739b4927d Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Thu, 6 Jul 2006 11:25:44 -0400 Subject: [PATCH 103/152] more steps toward O3 SMT src/arch/mips/isa/formats/fp.isa: Adjust for newmem src/cpu/cpu_models.py: Use O3DynInst instead of convoluted way src/cpu/o3/alpha/impl.hh: take out O3DynInst typedef here ... src/cpu/o3/cpu.cc: open up the SMT functions in the O3CPU src/cpu/static_inst.hh: Add O3DynInst src/cpu/o3/dyn_inst.hh: Use to get ISA-specific O3DynInst --HG-- extra : convert_revision : 3713187ead93e336e80889e23a1f1d2f36d664fe --- src/arch/mips/isa/formats/fp.isa | 16 +++++---- src/cpu/cpu_models.py | 16 ++------- src/cpu/o3/alpha/impl.hh | 4 +-- src/cpu/o3/cpu.cc | 61 +++++++++++++++----------------- src/cpu/o3/dyn_inst.hh | 39 ++++++++++++++++++++ src/cpu/static_inst.hh | 2 ++ 6 files changed, 82 insertions(+), 56 deletions(-) create mode 100644 src/cpu/o3/dyn_inst.hh diff --git a/src/arch/mips/isa/formats/fp.isa b/src/arch/mips/isa/formats/fp.isa index 1e5d62626c..cdb892b3f9 100644 --- a/src/arch/mips/isa/formats/fp.isa +++ b/src/arch/mips/isa/formats/fp.isa @@ -142,10 +142,10 @@ output exec {{ cpu->setFloatRegBits(inst, 0, mips_nan, size); //Read FCSR from FloatRegFile - uint32_t fcsr_bits = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr_bits = cpu->tcBase()->readFloatRegBits(FCSR); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); + cpu->tcBase()->setFloatRegBits(FCSR, genInvalidVector(fcsr_bits)); if (traceData) { traceData->setData(mips_nan); } return true; @@ -158,12 +158,12 @@ output exec {{ fpResetCauseBits(%(CPU_exec_context)s *cpu) { //Read FCSR from FloatRegFile - uint32_t fcsr = cpu->tc->readFloatRegBits(FCSR); + uint32_t fcsr = cpu->tcBase()->readFloatRegBits(FCSR); fcsr = bits(fcsr, 31, 18) << 18 | bits(fcsr, 11, 0); //Write FCSR from FloatRegFile - cpu->tc->setFloatRegBits(FCSR, fcsr); + cpu->tcBase()->setFloatRegBits(FCSR, fcsr); } }}; @@ -176,8 +176,9 @@ def template FloatingPointExecute {{ //When is the right time to reset cause bits? //start of every instruction or every cycle? +#if FULL_SYSTEM fpResetCauseBits(xc); - +#endif %(op_decl)s; %(op_rd)s; @@ -192,7 +193,10 @@ def template FloatingPointExecute {{ //---- //Check for IEEE 754 FP Exceptions //fault = fpNanOperands((FPOp*)this, xc, Fd, traceData); - if (!fpInvalidOp((FPOp*)this, xc, Fd, traceData) && + if ( +#if FULL_SYSTEM + !fpInvalidOp((FPOp*)this, xc, Fd, traceData) && +#endif fault == NoFault) { %(op_wb)s; diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index 1add327458..ccaceeff3e 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -79,18 +79,6 @@ CpuModel('OzoneCPU', 'ozone_exec.cc', CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) - -# Maybe there is a more clever way to determine ISA -# here but since the environment variable isnt passed through -# here the easiest way is this... -sub_template = 'not found' -for argument in sys.argv: - if 'ALPHA' in argument: - sub_template = 'AlphaDynInst' - -if sub_template == 'not found': - sys.exit('NO CPU_exec_context substitution defined for this ISA') - CpuModel('O3CPU', 'o3_cpu_exec.cc', - '#include "cpu/o3/isa_specific.hh"', - { 'CPU_exec_context': sub_template }) + '#include "cpu/o3/alpha/dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst' }) diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index 8cd8692c60..b928ae654e 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -36,6 +36,7 @@ #include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" + // Forward declarations. template class AlphaDynInst; @@ -88,7 +89,4 @@ struct AlphaSimpleImpl /** The O3Impl to be used. */ typedef AlphaSimpleImpl O3CPUImpl; -/** The O3Impl to be used. */ -typedef DynInst O3DynInst; - #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index feca4cdf2f..630d82cba1 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -463,14 +463,13 @@ template void FullO3CPU::insertThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Initializing thread data"); + DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU"); // Will change now that the PC and thread state is internal to the CPU // and not in the ThreadContext. -#if 0 #if FULL_SYSTEM ThreadContext *src_tc = system->threadContexts[tid]; #else - ThreadContext *src_tc = thread[tid]; + ThreadContext *src_tc = tcBase(tid); #endif //Bind Int Regs to Rename Map @@ -490,11 +489,14 @@ FullO3CPU::insertThread(unsigned tid) } //Copy Thread Data Into RegFile - this->copyFromTC(tid); + //this->copyFromTC(tid); - //Set PC/NPC - regFile.pc[tid] = src_tc->readPC(); - regFile.npc[tid] = src_tc->readNextPC(); + //Set PC/NPC/NNPC + setPC(src_tc->readPC(), tid); + setNextPC(src_tc->readNextPC(), tid); +#if THE_ISA != ALPHA_ISA + setNextNPC(src_tc->readNextNPC(), tid); +#endif src_tc->setStatus(ThreadContext::Active); @@ -503,16 +505,19 @@ FullO3CPU::insertThread(unsigned tid) //Reset ROB/IQ/LSQ Entries commit.rob->resetEntries(); iew.resetEntries(); -#endif } template void FullO3CPU::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread data"); -#if 0 - //Unbind Int Regs from Rename Map + DPRINTF(O3CPU,"[tid:%i] Removing thread from CPU."); + + // Copy Thread Data From RegFile + // If thread is suspended, it might be re-allocated + //this->copyToTC(tid); + + // Unbind Int Regs from Rename Map for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); @@ -520,7 +525,7 @@ FullO3CPU::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Unbind Float Regs from Rename Map + // Unbind Float Regs from Rename Map for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { PhysRegIndex phys_reg = renameMap[tid].lookup(freg); @@ -528,27 +533,18 @@ FullO3CPU::removeThread(unsigned tid) freeList.addReg(phys_reg); } - //Copy Thread Data From RegFile - /* Fix Me: - * Do we really need to do this if we are removing a thread - * in the sense that it's finished (exiting)? If the thread is just - * being suspended we might... - */ -// this->copyToTC(tid); - - //Squash Throughout Pipeline + // Squash Throughout Pipeline fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); assert(iew.ldstQueue.getCount(tid) == 0); - //Reset ROB/IQ/LSQ Entries + // Reset ROB/IQ/LSQ Entries if (activeThreads.size() >= 1) { commit.rob->resetEntries(); iew.resetEntries(); } -#endif } @@ -656,7 +652,7 @@ template void FullO3CPU::suspendContext(int tid) { - DPRINTF(O3CPU,"[tid: %i]: Suspended ...\n", tid); + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); unscheduleTickEvent(); _status = Idle; /* @@ -676,27 +672,26 @@ template void FullO3CPU::deallocateContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Deallocating ...", tid); -/* - //Remove From Active List, if Active - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); + DPRINTF(O3CPU,"[tid:%i]: Deallocating Thread Context", tid); - if (isActive != activeThreads.end()) { + //Remove From Active List, if Active + list::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", tid); - activeThreads.erase(isActive); + activeThreads.erase(thread_it); removeThread(tid); } -*/ } template void FullO3CPU::haltContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Halted ...", tid); + DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); /* //Remove From Active List, if Active list::iterator isActive = find( diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh new file mode 100644 index 0000000000..d029488fda --- /dev/null +++ b/src/cpu/o3/dyn_inst.hh @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_DYN_INST_HH__ +#define __CPU_O3_DYN_INST_HH__ + +#include "cpu/o3/isa_specific.hh" + +/** The O3Impl to be used. */ +typedef DynInst O3DynInst; + +#endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index a980786343..19f06f6699 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -53,6 +53,8 @@ class Packet; template class AlphaDynInst; +//class O3DynInst; + template class OzoneDynInst; From 03fa13b27ce461886dceef82af0d3e994b5b9288 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Thu, 6 Jul 2006 12:18:55 -0400 Subject: [PATCH 104/152] Use O3DynInst in cpu_models.py and in static_inst_exec_sigs.hh instead of a specific ISA dyn. inst. src/cpu/cpu_models.py: Use O3DynInst src/cpu/o3/dyn_inst.hh: declare O3DynInst here based off of ISA ... this must be updated for each ISA. src/cpu/static_inst.hh: take out O3 forward declarations here and include header file to keep this file clean --HG-- extra : convert_revision : 0d65463479c3cfc2d1154935b1032dae32c5efd0 --- src/cpu/cpu_models.py | 4 ++-- src/cpu/o3/dyn_inst.hh | 15 ++++++++++----- src/cpu/static_inst.hh | 6 +----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index ccaceeff3e..5b0c6c4dac 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -80,5 +80,5 @@ CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) CpuModel('O3CPU', 'o3_cpu_exec.cc', - '#include "cpu/o3/alpha/dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst' }) + '#include "cpu/o3/isa_specific.hh"', + { 'CPU_exec_context': 'O3DynInst' }) diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index d029488fda..34afa2d1b3 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,15 +25,20 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Authors: Kevin Lim + * Authors: Korey Sewell */ #ifndef __CPU_O3_DYN_INST_HH__ #define __CPU_O3_DYN_INST_HH__ -#include "cpu/o3/isa_specific.hh" -/** The O3Impl to be used. */ -typedef DynInst O3DynInst; +#if THE_ISA == ALPHA_ISA +template +class AlphaDynInst; + +struct AlphaSimpleImpl; + +typedef AlphaDynInst O3DynInst; +#endif #endif // __CPU_O3_DYN_INST_HH__ diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 19f06f6699..ea1a651489 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -39,6 +39,7 @@ #include "base/misc.hh" #include "base/refcnt.hh" #include "cpu/op_class.hh" +#include "cpu/o3/dyn_inst.hh" #include "sim/host.hh" #include "arch/isa_traits.hh" @@ -50,11 +51,6 @@ class ThreadContext; class DynInst; class Packet; -template -class AlphaDynInst; - -//class O3DynInst; - template class OzoneDynInst; From e60f998e2993df35460c8835016b3043a13da80a Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Thu, 6 Jul 2006 12:29:34 -0400 Subject: [PATCH 105/152] Had to add this because for some reason gcc wasnt recognizing "THE_ISA == ALPHA_ISA"... wierd but OK --HG-- extra : convert_revision : f847d6c01212e32200a319c16596b8e1c1d15c7d --- src/cpu/o3/dyn_inst.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 34afa2d1b3..a2cdf2dba4 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -31,6 +31,7 @@ #ifndef __CPU_O3_DYN_INST_HH__ #define __CPU_O3_DYN_INST_HH__ +#include "arch/isa_specific.hh" #if THE_ISA == ALPHA_ISA template From 8c547d80b1a091f41f5516f58ad7368181fe4041 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 13:57:21 -0400 Subject: [PATCH 106/152] Change the return value of drain. False means the object wasn't able to drain yet. src/python/m5/config.py: Invert the return value. src/sim/sim_object.cc: Invert the return value of drain. src/sim/sim_object.hh: Change the return value of drain. --HG-- extra : convert_revision : 41bb122c6f29302d8b3815d7bd6a2ea8fba64df9 --- src/python/m5/config.py | 2 +- src/sim/sim_object.cc | 2 +- src/sim/sim_object.hh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/m5/config.py b/src/python/m5/config.py index cffe069848..8291e1e1bd 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -547,7 +547,7 @@ class SimObject(object): count = 0 # ParamContexts don't serialize if isinstance(self, SimObject) and not isinstance(self, ParamContext): - if self._ccObject.drain(drain_event): + if not self._ccObject.drain(drain_event): count = 1 if recursive: for child in self._children.itervalues(): diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index 655bdcf4ec..a0278dba0f 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -277,7 +277,7 @@ SimObject::drain(Event *drain_event) "in timing mode!"); } state = DrainedAtomic; - return false; + return true; } void diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index 4833192d69..7ecc009588 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -116,7 +116,7 @@ class SimObject : public Serializable, protected StartupCallback // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model - // Quiesce returns true if the SimObject cannot quiesce immediately. + // Drain returns false if the SimObject cannot drain immediately. virtual bool drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); From 30c516d51cad44f62a7269a59f067ae5a1be81df Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 13:59:02 -0400 Subject: [PATCH 107/152] Support for draining, and the new method of switching out. Now switching out happens after the pipeline has been drained, deferring the three way handshake to the normal drain mechanism. The calls of switchOut() and takeOverFrom() both take action immediately. src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/decode.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/rename.hh: src/cpu/o3/rename_impl.hh: Support for draining, new method of switching out. --HG-- extra : convert_revision : 05bf8b271ec85b3e2c675c3bed6c42aeba21f465 --- src/cpu/o3/commit.hh | 13 ++++--- src/cpu/o3/commit_impl.hh | 21 +++++++---- src/cpu/o3/cpu.cc | 75 ++++++++++++++++++++++++++------------- src/cpu/o3/cpu.hh | 25 +++++++++---- src/cpu/o3/decode.hh | 8 ++++- src/cpu/o3/decode_impl.hh | 6 ++-- src/cpu/o3/fetch.hh | 14 +++++--- src/cpu/o3/fetch_impl.hh | 24 +++++++++---- src/cpu/o3/iew.hh | 9 +++-- src/cpu/o3/iew_impl.hh | 14 +++++--- src/cpu/o3/rename.hh | 9 +++-- src/cpu/o3/rename_impl.hh | 6 ++-- 12 files changed, 154 insertions(+), 70 deletions(-) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 60b555269e..49ff5cdadb 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -187,11 +187,14 @@ class DefaultCommit /** Initializes stage by sending back the number of free entries. */ void initStage(); - /** Initializes the switching out of commit. */ - void switchOut(); + /** Initializes the draining of commit. */ + void drain(); + + /** Resumes execution after draining. */ + void resume(); /** Completes the switch out of commit. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -383,8 +386,8 @@ class DefaultCommit /** Number of Active Threads */ unsigned numThreads; - /** Is a switch out pending. */ - bool switchPending; + /** Is a drain pending. */ + bool drainPending; /** Is commit switched out. */ bool switchedOut; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 06b8e8a954..2eb05afac2 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -80,7 +80,7 @@ DefaultCommit::DefaultCommit(Params *params) renameWidth(params->renameWidth), commitWidth(params->commitWidth), numThreads(params->numberOfThreads), - switchPending(false), + drainPending(false), switchedOut(false), trapLatency(params->trapLatency), fetchTrapLatency(params->fetchTrapLatency) @@ -351,20 +351,26 @@ DefaultCommit::initStage() template void -DefaultCommit::switchOut() +DefaultCommit::drain() { - switchPending = true; + drainPending = true; } template void -DefaultCommit::doSwitchOut() +DefaultCommit::switchOut() { switchedOut = true; - switchPending = false; + drainPending = false; rob->switchOut(); } +template +void +DefaultCommit::resume() +{ +} + template void DefaultCommit::takeOverFrom() @@ -557,8 +563,9 @@ DefaultCommit::tick() wroteToTimeBuffer = false; _nextStatus = Inactive; - if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { - cpu->signalSwitched(); + if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; return; } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index fb7739db87..5bda57cf8a 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -158,7 +158,7 @@ FullO3CPU::FullO3CPU(Params *params) physmem(system->physmem), #endif // FULL_SYSTEM mem(params->mem), - switchCount(0), + drainCount(0), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -713,45 +713,72 @@ FullO3CPU::haltContext(int tid) } template -void -FullO3CPU::switchOut() +bool +FullO3CPU::drain(Event *drain_event) { - switchCount = 0; - fetch.switchOut(); - decode.switchOut(); - rename.switchOut(); - iew.switchOut(); - commit.switchOut(); + drainCount = 0; + drainEvent = drain_event; + fetch.drain(); + decode.drain(); + rename.drain(); + iew.drain(); + commit.drain(); // Wake the CPU and record activity so everything can drain out if // the CPU is currently idle. wakeCPU(); activityRec.activity(); + + return false; } template void -FullO3CPU::signalSwitched() +FullO3CPU::resume() { - if (++switchCount == NumStages) { - fetch.doSwitchOut(); - rename.doSwitchOut(); - commit.doSwitchOut(); - instList.clear(); - while (!removeList.empty()) { - removeList.pop(); - } + if (_status == SwitchedOut) + return; + fetch.resume(); + decode.resume(); + rename.resume(); + iew.resume(); + commit.resume(); -#if USE_CHECKER - if (checker) - checker->switchOut(); -#endif + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + _status = Running; +} +template +void +FullO3CPU::signalDrained() +{ + if (++drainCount == NumStages) { if (tickEvent.scheduled()) tickEvent.squash(); - _status = SwitchedOut; + _status = Drained; + drainEvent->process(); } - assert(switchCount <= 5); + assert(drainCount <= 5); +} + +template +void +FullO3CPU::switchOut() +{ + fetch.switchOut(); + rename.switchOut(); + commit.switchOut(); + instList.clear(); + while (!removeList.empty()) { + removeList.pop(); + } + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index bd04516019..cf37476014 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -57,6 +57,8 @@ class Checker; class ThreadContext; template class O3ThreadContext; + +class Checkpoint; class MemObject; class Process; @@ -109,6 +111,7 @@ class FullO3CPU : public BaseO3CPU Idle, Halted, Blocked, + Drained, SwitchedOut }; @@ -270,14 +273,21 @@ class FullO3CPU : public BaseO3CPU */ virtual void syscall(int tid) { panic("Unimplemented!"); } - /** Switches out this CPU. */ - void switchOut(); + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual bool drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); /** Signals to this CPU that a stage has completed switching out. */ - void signalSwitched(); + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); /** Takes over from another CPU. */ - void takeOverFrom(BaseCPU *oldCPU); + virtual void takeOverFrom(BaseCPU *oldCPU); /** Get the current instruction sequence number, and increment it. */ InstSeqNum getAndIncrementInstSeq() @@ -550,8 +560,11 @@ class FullO3CPU : public BaseO3CPU /** Pointer to memory. */ MemObject *mem; - /** Counter of how many stages have completed switching out. */ - int switchCount; + /** Event to call process() on once draining has completed. */ + Event *drainEvent; + + /** Counter of how many stages have completed draining. */ + int drainCount; /** Pointers to all of the threads in the CPU. */ std::vector thread; diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 1edf3335db..1e96f18848 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -109,8 +109,14 @@ class DefaultDecode /** Sets pointer to list of active threads. */ void setActiveThreads(std::list *at_ptr); + /** Drains the decode stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the decode stage. */ - void switchOut(); + void switchOut() { } /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 16be017845..71637883bc 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -166,10 +166,10 @@ DefaultDecode::setActiveThreads(list *at_ptr) template void -DefaultDecode::switchOut() +DefaultDecode::drain() { - // Decode can immediately switch out. - cpu->signalSwitched(); + // Decode is done draining at any time. + cpu->signalDrained(); } template diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 848ebf39e0..9611f0455b 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -180,11 +180,14 @@ class DefaultFetch /** Processes cache completion event. */ void processCacheCompletion(PacketPtr pkt); - /** Begins the switch out of the fetch stage. */ - void switchOut(); + /** Begins the drain of the fetch stage. */ + void drain(); - /** Completes the switch out of the fetch stage. */ - void doSwitchOut(); + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); @@ -421,6 +424,9 @@ class DefaultFetch */ bool interruptPending; + /** Is there a drain pending. */ + bool drainPending; + /** Records if fetch is switched out. */ bool switchedOut; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 60eb76d178..500b5304eb 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -109,6 +109,7 @@ DefaultFetch::DefaultFetch(Params *params) numThreads(params->numberOfThreads), numFetchingThreads(params->smtNumFetchingThreads), interruptPending(false), + drainPending(false), switchedOut(false) { if (numThreads > Impl::MaxThreads) @@ -353,7 +354,8 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) // to return. if (fetchStatus[tid] != IcacheWaitResponse || pkt->req != memReq[tid] || - isSwitchedOut()) { + isSwitchedOut() || + drainPending) { ++fetchIcacheSquashes; delete pkt->req; delete pkt; @@ -384,17 +386,25 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) template void -DefaultFetch::switchOut() +DefaultFetch::drain() { - // Fetch is ready to switch out at any time. - switchedOut = true; - cpu->signalSwitched(); + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; } template void -DefaultFetch::doSwitchOut() +DefaultFetch::resume() { + drainPending = false; +} + +template +void +DefaultFetch::switchOut() +{ + switchedOut = true; // Branch predictor needs to have its state cleared. branchPred.switchOut(); } @@ -498,7 +508,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || switchedOut) { + if (cacheBlocked || (interruptPending && flags == 0) || drainPending) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 9627609c22..774b6dcbdc 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -143,11 +143,14 @@ class DefaultIEW /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *sb_ptr); - /** Starts switch out of IEW stage. */ - void switchOut(); + /** Drains IEW stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume(); /** Completes switch out of IEW stage. */ - void doSwitchOut(); + void switchOut(); /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 118038b651..c3aa748ae8 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -355,15 +355,21 @@ DefaultIEW::setScoreboard(Scoreboard *sb_ptr) template void -DefaultIEW::switchOut() +DefaultIEW::drain() { - // IEW is ready to switch out at any time. - cpu->signalSwitched(); + // IEW is ready to drain at any time. + cpu->signalDrained(); } template void -DefaultIEW::doSwitchOut() +DefaultIEW::resume() +{ +} + +template +void +DefaultIEW::switchOut() { // Clear any state. switchedOut = true; diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 581fc8f817..538dd9bb43 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -157,12 +157,15 @@ class DefaultRename /** Sets pointer to the scoreboard. */ void setScoreboard(Scoreboard *_scoreboard); + /** Drains the rename stage. */ + void drain(); + + /** Resumes execution after a drain. */ + void resume() { } + /** Switches out the rename stage. */ void switchOut(); - /** Completes the switch out. */ - void doSwitchOut(); - /** Takes over from another CPU's thread. */ void takeOverFrom(); diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index df8b7f9dab..fddbae3dba 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -258,15 +258,15 @@ DefaultRename::setScoreboard(Scoreboard *_scoreboard) template void -DefaultRename::switchOut() +DefaultRename::drain() { // Rename is ready to switch out at any time. - cpu->signalSwitched(); + cpu->signalDrained(); } template void -DefaultRename::doSwitchOut() +DefaultRename::switchOut() { // Clear any state, fix up the rename map. for (int i = 0; i < numThreads; i++) { From 93839380e7dc4799d234843d10329c03d38487fa Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 6 Jul 2006 14:41:01 -0400 Subject: [PATCH 108/152] Add default responder to bus Update configuration for new default responder on bus Update to devices to handle their own pci config space without pciconfigall Remove most of pciconfigall, it now is a dumbdevice which gets it's address based on the bus it's supposed to respond for Remove need for pci config space from platform, add registerPciDevice function to prevent more than one device from having same bus:dev:func and interrupt Remove pciconfigspace from pci devices, and py files Add calcConfigAddr that returns address for config space based on bus/dev/function + offset configs/test/fs.py: Update configuration for new default responder on bus src/dev/ide_ctrl.cc: src/dev/ide_ctrl.hh: src/dev/ns_gige.cc: src/dev/ns_gige.hh: src/dev/pcidev.cc: src/dev/pcidev.hh: Update to handle it's own pci config space without pciconfigall src/dev/io_device.cc: src/dev/io_device.hh: change naming for pio port break out recvTiming into two functions to reuse code src/dev/pciconfigall.cc: src/dev/pciconfigall.hh: removing most of pciconfigall, it now is a dumbdevice which gets it's address based on the bus it's supposed to respond for src/dev/pcireg.h: add a max size for PCI config space (per PCI spec) src/dev/platform.cc: src/dev/platform.hh: remove need for pci config space from platform, add registerPciDevice function to prevent more than one device from having same bus:dev:func and interrupt src/dev/sinic.cc: remove pciconfigspace as it's no longer a needed parameter src/dev/tsunami.cc: src/dev/tsunami.hh: src/dev/tsunami_pchip.cc: src/dev/tsunami_pchip.hh: add calcConfigAddr that returns address for config space based on bus/dev/function + offset (per PCI spec) src/mem/bus.cc: src/mem/bus.hh: src/python/m5/objects/Bus.py: add idea of default responder to bus src/python/m5/objects/Pci.py: add config port for pci devices add latency, bus and size parameters for pci config all (min is 8MB, max is 256MB see pci spec) --HG-- extra : convert_revision : 99db43b0a3a077f86611d6eaff6664a3885da7c9 --- configs/test/fs.py | 8 +- src/dev/ide_ctrl.cc | 240 +++++++++++------------ src/dev/ide_ctrl.hh | 8 +- src/dev/io_device.cc | 22 ++- src/dev/io_device.hh | 4 +- src/dev/ns_gige.cc | 30 +-- src/dev/ns_gige.hh | 3 +- src/dev/pciconfigall.cc | 131 ++----------- src/dev/pciconfigall.hh | 66 ++----- src/dev/pcidev.cc | 355 ++++++++++++++++++++--------------- src/dev/pcidev.hh | 112 +++++++---- src/dev/pcireg.h | 1 + src/dev/platform.cc | 16 ++ src/dev/platform.hh | 15 +- src/dev/sinic.cc | 4 - src/dev/tsunami.cc | 7 + src/dev/tsunami.hh | 6 + src/dev/tsunami_pchip.cc | 11 ++ src/dev/tsunami_pchip.hh | 5 + src/mem/bus.cc | 116 +++++++++--- src/mem/bus.hh | 12 +- src/python/m5/objects/Bus.py | 1 + src/python/m5/objects/Pci.py | 10 +- 23 files changed, 615 insertions(+), 568 deletions(-) diff --git a/configs/test/fs.py b/configs/test/fs.py index aa530dd55c..e0dd38e415 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -1,6 +1,6 @@ import m5 from m5.objects import * -import os +import os,optparse,sys from SysPaths import * parser = optparse.OptionParser(option_list=m5.standardOptions) @@ -98,7 +98,7 @@ class SpecwebFilesetDisk(IdeDisk): class BaseTsunami(Tsunami): cchip = TsunamiCChip(pio_addr=0x801a0000000) pchip = TsunamiPChip(pio_addr=0x80180000000) - pciconfig = PciConfigAll(pio_addr=0x801fe000000) + pciconfig = PciConfigAll() fake_sm_chip = IsaFake(pio_addr=0x801fc000370) fake_uart1 = IsaFake(pio_addr=0x801fc0002f8) @@ -151,16 +151,18 @@ class MyLinuxAlphaSystem(LinuxAlphaSystem): tsunami = LinuxTsunami() tsunami.cchip.pio = magicbus.port tsunami.pchip.pio = magicbus.port - tsunami.pciconfig.pio = magicbus.port + tsunami.pciconfig.pio = magicbus.default tsunami.fake_sm_chip.pio = magicbus.port tsunami.ethernet.pio = magicbus.port tsunami.ethernet.dma = magicbus.port + tsunami.ethernet.config = magicbus.port tsunami.fake_uart1.pio = magicbus.port tsunami.fake_uart2.pio = magicbus.port tsunami.fake_uart3.pio = magicbus.port tsunami.fake_uart4.pio = magicbus.port tsunami.ide.pio = magicbus.port tsunami.ide.dma = magicbus.port + tsunami.ide.config = magicbus.port tsunami.fake_ppc.pio = magicbus.port tsunami.fake_OROM.pio = magicbus.port tsunami.fake_pnp_addr.pio = magicbus.port diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc index 63435e87c1..5ffc02d349 100644 --- a/src/dev/ide_ctrl.cc +++ b/src/dev/ide_ctrl.cc @@ -227,177 +227,143 @@ IdeController::setDmaComplete(IdeDisk *disk) // Read and write handling //// -void -IdeController::readConfig(int offset, uint8_t *data) +Tick +IdeController::readConfig(Packet *pkt) { - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) + return PciDev::readConfig(pkt); + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); + pkt->allocate(); + + switch (pkt->getSize()) { + case sizeof(uint8_t): switch (offset) { case IDE_CTRL_CONF_DEV_TIMING: - *data = config_regs.sidetim; + pkt->set(config_regs.sidetim); break; case IDE_CTRL_CONF_UDMA_CNTRL: - *data = config_regs.udmactl; + pkt->set(config_regs.udmactl); break; case IDE_CTRL_CONF_PRIM_TIMING+1: - *data = htole(config_regs.idetim0) >> 8; + pkt->set(htole(config_regs.idetim0) >> 8); break; case IDE_CTRL_CONF_SEC_TIMING+1: - *data = htole(config_regs.idetim1) >> 8; + pkt->set(htole(config_regs.idetim1) >> 8); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = htole(config_regs.ideconfig) & 0xFF; + pkt->set(htole(config_regs.ideconfig) & 0xFF); break; case IDE_CTRL_CONF_IDE_CONFIG+1: - *data = htole(config_regs.ideconfig) >> 8; + pkt->set(htole(config_regs.ideconfig) >> 8); break; default: panic("Invalid PCI configuration read for size 1 at offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)*data); -} - -void -IdeController::readConfig(int offset, uint16_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { - + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 1 data: %#x\n", offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint16_t): switch (offset) { case IDE_CTRL_CONF_PRIM_TIMING: - *data = config_regs.idetim0; + pkt->set(config_regs.idetim0); break; case IDE_CTRL_CONF_SEC_TIMING: - *data = config_regs.idetim1; + pkt->set(config_regs.idetim1); break; case IDE_CTRL_CONF_UDMA_TIMING: - *data = config_regs.udmatim; + pkt->set(config_regs.udmatim); break; case IDE_CTRL_CONF_IDE_CONFIG: - *data = config_regs.ideconfig; + pkt->set(config_regs.ideconfig); break; default: panic("Invalid PCI configuration read for size 2 offset: %#x!\n", offset); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint32_t): + panic("No 32bit reads implemented for this device."); + DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, + (uint32_t)pkt->get()); + break; + default: + panic("invalid access size(?) for PCI configspace!\n"); } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 2 data: %#x\n", offset, *data); + pkt->result = Packet::Success; + return configDelay; + } -void -IdeController::readConfig(int offset, uint32_t *data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::readConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI read offset: %#x size: 4 data: %#x\n", offset, *data); -} -void -IdeController::writeConfig(int offset, const uint8_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 1) <= IDE_CTRL_CONF_END) { - switch (offset) { - case IDE_CTRL_CONF_DEV_TIMING: - config_regs.sidetim = data; +Tick +IdeController::writeConfig(Packet *pkt) +{ + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; + if (offset < PCI_DEVICE_SPECIFIC) { + PciDev::writeConfig(pkt); + } else { + assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END); + + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case IDE_CTRL_CONF_DEV_TIMING: + config_regs.sidetim = pkt->get(); + break; + case IDE_CTRL_CONF_UDMA_CNTRL: + config_regs.udmactl = pkt->get(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | + (pkt->get()); + break; + case IDE_CTRL_CONF_IDE_CONFIG+1: + config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | + pkt->get() << 8; + break; + default: + panic("Invalid PCI configuration write for size 1 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", + offset, (uint32_t)pkt->get()); break; - case IDE_CTRL_CONF_UDMA_CNTRL: - config_regs.udmactl = data; + case sizeof(uint16_t): + switch (offset) { + case IDE_CTRL_CONF_PRIM_TIMING: + config_regs.idetim0 = pkt->get(); + break; + case IDE_CTRL_CONF_SEC_TIMING: + config_regs.idetim1 = pkt->get(); + break; + case IDE_CTRL_CONF_UDMA_TIMING: + config_regs.udmatim = pkt->get(); + break; + case IDE_CTRL_CONF_IDE_CONFIG: + config_regs.ideconfig = pkt->get(); + break; + default: + panic("Invalid PCI configuration write for size 2 offset: %#x!\n", + offset); + } + DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", + offset, (uint32_t)pkt->get()); break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = (config_regs.ideconfig & 0xFF00) | (data); - break; - case IDE_CTRL_CONF_IDE_CONFIG+1: - config_regs.ideconfig = (config_regs.ideconfig & 0x00FF) | data << 8; + case sizeof(uint32_t): + panic("Write of unimplemented PCI config. register: %x\n", offset); break; default: - panic("Invalid PCI configuration write for size 1 offset: %#x!\n", - offset); + panic("invalid access size(?) for PCI configspace!\n"); } - - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 1 data: %#x\n", - offset, (uint32_t)data); -} - -void -IdeController::writeConfig(int offset, const uint16_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else if (offset >= IDE_CTRL_CONF_START && - (offset + 2) <= IDE_CTRL_CONF_END) { - - switch (offset) { - case IDE_CTRL_CONF_PRIM_TIMING: - config_regs.idetim0 = data; - break; - case IDE_CTRL_CONF_SEC_TIMING: - config_regs.idetim1 = data; - break; - case IDE_CTRL_CONF_UDMA_TIMING: - config_regs.udmatim = data; - break; - case IDE_CTRL_CONF_IDE_CONFIG: - config_regs.ideconfig = data; - break; - default: - panic("Invalid PCI configuration write for size 2 offset: %#x!\n", - offset); - } - - } else { - panic("Write of unimplemented PCI config. register: %x\n", offset); - } - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 2 data: %#x\n", offset, data); - - /* Trap command register writes and enable IO/BM as appropriate. */ - if (offset == PCI_COMMAND) { - if (letoh(config.command) & PCI_CMD_IOSE) - io_enabled = true; - else - io_enabled = false; - - if (letoh(config.command) & PCI_CMD_BME) - bm_enabled = true; - else - bm_enabled = false; } -} - -void -IdeController::writeConfig(int offset, const uint32_t data) -{ - if (offset < PCI_DEVICE_SPECIFIC) { - PciDev::writeConfig(offset, data); - } else { - panic("Read of unimplemented PCI config. register: %x\n", offset); - } - - DPRINTF(IdeCtrl, "PCI write offset: %#x size: 4 data: %#x\n", offset, data); - + /* Trap command register writes and enable IO/BM as appropriate as well as + * BARs. */ switch(offset) { case PCI0_BASE_ADDR0: if (BARAddrs[0] != 0) @@ -423,9 +389,24 @@ IdeController::writeConfig(int offset, const uint32_t data) if (BARAddrs[4] != 0) bmi_addr = BARAddrs[4]; break; + + case PCI_COMMAND: + if (letoh(config.command) & PCI_CMD_IOSE) + io_enabled = true; + else + io_enabled = false; + + if (letoh(config.command) & PCI_CMD_BME) + bm_enabled = true; + else + bm_enabled = false; + break; } + pkt->result = Packet::Success; + return configDelay; } + Tick IdeController::read(Packet *pkt) { @@ -770,7 +751,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(IdeController) SimObjectParam system; SimObjectParam platform; - SimObjectParam configspace; SimObjectParam configdata; Param pci_bus; Param pci_dev; @@ -784,7 +764,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(IdeController) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -800,7 +779,6 @@ CREATE_SIM_OBJECT(IdeController) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ide_ctrl.hh b/src/dev/ide_ctrl.hh index 1d30c8b31f..5842d322ef 100644 --- a/src/dev/ide_ctrl.hh +++ b/src/dev/ide_ctrl.hh @@ -204,12 +204,8 @@ class IdeController : public PciDev IdeController(Params *p); ~IdeController(); - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual Tick writeConfig(Packet *pkt); + virtual Tick readConfig(Packet *pkt); void setDmaComplete(IdeDisk *disk); diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index e769ef0372..cb4850108f 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -34,8 +34,8 @@ #include "sim/builder.hh" -PioPort::PioPort(PioDevice *dev, Platform *p) - : Port(dev->name() + "-pioport"), device(dev), platform(p) +PioPort::PioPort(PioDevice *dev, Platform *p, std::string pname) + : Port(dev->name() + pname), device(dev), platform(p) { } @@ -79,19 +79,23 @@ PioPort::SendEvent::process() port->transmitList.push_back(packet); } +void +PioPort::resendNacked(Packet *pkt) { + pkt->reinitNacked(); + if (transmitList.size()) { + transmitList.push_front(pkt); + } else { + if (!Port::sendTiming(pkt)) + transmitList.push_front(pkt); + } +}; bool PioPort::recvTiming(Packet *pkt) { if (pkt->result == Packet::Nacked) { - pkt->reinitNacked(); - if (transmitList.size()) { - transmitList.push_front(pkt); - } else { - if (!Port::sendTiming(pkt)) - transmitList.push_front(pkt); - } + resendNacked(pkt); } else { Tick latency = device->recvAtomic(pkt); // turn packet around to go back to requester diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index a2b61c7f43..40edf68757 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -82,6 +82,8 @@ class PioPort : public Port virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + void resendNacked(Packet *pkt); + /** * This class is used to implemented sendTiming() with a delay. When a delay * is requested a new event is created. When the event time expires it @@ -113,7 +115,7 @@ class PioPort : public Port virtual void recvRetry(); public: - PioPort(PioDevice *dev, Platform *p); + PioPort(PioDevice *dev, Platform *p, std::string pname = "-pioport"); friend class PioPort::SendEvent; }; diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index 360fe8c9be..179a2c62dd 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -465,11 +465,12 @@ NSGigE::regStats() /** * This is to write to the PCI general configuration registers */ -void -NSGigE::writeConfig(int offset, const uint16_t data) +Tick +NSGigE::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset < PCI_DEVICE_SPECIFIC) - PciDev::writeConfig(offset, data); + PciDev::writeConfig(pkt); else panic("Device specific PCI config space not implemented!\n"); @@ -484,6 +485,8 @@ NSGigE::writeConfig(int offset, const uint16_t data) ioEnable = false; break; } + pkt->result = Packet::Success; + return configDelay; } /** @@ -508,14 +511,7 @@ NSGigE::read(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - readConfig(daddr & 0xff, pkt->getPtr()); - if (pkt->getSize() == sizeof(uint16_t)) - readConfig(daddr & 0xff, pkt->getPtr()); - if (pkt->getSize() == sizeof(uint32_t)) - readConfig(daddr & 0xff, pkt->getPtr()); - pkt->result = Packet::Success; - return pioDelay; + return readConfig(pkt); } else if (daddr >= MIB_START && daddr <= MIB_END) { // don't implement all the MIB's. hopefully the kernel // doesn't actually DEPEND upon their values @@ -733,14 +729,7 @@ NSGigE::write(Packet *pkt) if (daddr > LAST && daddr <= RESERVED) { panic("Accessing reserved register"); } else if (daddr > RESERVED && daddr <= 0x3FC) { - if (pkt->getSize() == sizeof(uint8_t)) - writeConfig(daddr & 0xff, pkt->get()); - if (pkt->getSize() == sizeof(uint16_t)) - writeConfig(daddr & 0xff, pkt->get()); - if (pkt->getSize() == sizeof(uint32_t)) - writeConfig(daddr & 0xff, pkt->get()); - pkt->result = Packet::Success; - return pioDelay; + return writeConfig(pkt); } else if (daddr > 0x3FC) panic("Something is messed up!\n"); @@ -2807,7 +2796,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(NSGigE) SimObjectParam system; SimObjectParam platform; - SimObjectParam configspace; SimObjectParam configdata; Param pci_bus; Param pci_dev; @@ -2841,7 +2829,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(NSGigE) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -2879,7 +2866,6 @@ CREATE_SIM_OBJECT(NSGigE) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh index 2f47026f38..ea72437770 100644 --- a/src/dev/ns_gige.hh +++ b/src/dev/ns_gige.hh @@ -114,7 +114,6 @@ struct dp_rom { class NSGigEInt; class Packet; -class PciConfigAll; /** * NS DP83820 Ethernet device model @@ -376,7 +375,7 @@ class NSGigE : public PciDev ~NSGigE(); const Params *params() const { return (const Params *)_params; } - virtual void writeConfig(int offset, const uint16_t data); + virtual Tick writeConfig(Packet *pkt); virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc index 785774ff45..68013eab89 100644 --- a/src/dev/pciconfigall.cc +++ b/src/dev/pciconfigall.cc @@ -33,14 +33,8 @@ * PCI Configspace implementation */ -#include -#include -#include -#include - #include "base/trace.hh" #include "dev/pciconfigall.hh" -#include "dev/pcidev.hh" #include "dev/pcireg.h" #include "dev/platform.hh" #include "mem/packet.hh" @@ -50,151 +44,61 @@ using namespace std; PciConfigAll::PciConfigAll(Params *p) - : BasicPioDevice(p) + : PioDevice(p) { - pioSize = 0xffffff; - - // Set backpointer for pci config. Really the config stuff should be able to - // automagically do this - p->platform->pciconfig = this; - - // Make all the pointers to devices null - for(int x=0; x < MAX_PCI_DEV; x++) - for(int y=0; y < MAX_PCI_FUNC; y++) - devices[x][y] = NULL; + pioAddr = p->platform->calcConfigAddr(params()->bus,0,0); } -// If two interrupts share the same line largely bad things will happen. -// Since we don't track how many times an interrupt was set and correspondingly -// cleared two devices on the same interrupt line and assert and deassert each -// others interrupt "line". Interrupts will not work correctly. -void -PciConfigAll::startup() -{ - bitset<256> intLines; - PciDev *tempDev; - uint8_t intline; - - for (int x = 0; x < MAX_PCI_DEV; x++) { - for (int y = 0; y < MAX_PCI_FUNC; y++) { - if (devices[x][y] != NULL) { - tempDev = devices[x][y]; - intline = tempDev->interruptLine(); - if (intLines.test(intline)) - warn("Interrupt line %#X is used multiple times" - "(You probably want to fix this).\n", (uint32_t)intline); - else - intLines.set(intline); - } // devices != NULL - } // PCI_FUNC - } // PCI_DEV - -} Tick PciConfigAll::read(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - - Addr daddr = pkt->getAddr() - pioAddr; - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; pkt->allocate(); - DPRINTF(PciConfigAll, "read va=%#x da=%#x size=%d\n", pkt->getAddr(), daddr, + DPRINTF(PciConfigAll, "read va=%#x size=%d\n", pkt->getAddr(), pkt->getSize()); switch (pkt->getSize()) { case sizeof(uint32_t): - if (devices[device][func] == NULL) - pkt->set(0xFFFFFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr()); + pkt->set(0xFFFFFFFF); break; case sizeof(uint16_t): - if (devices[device][func] == NULL) - pkt->set(0xFFFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr()); + pkt->set(0xFFFF); break; case sizeof(uint8_t): - if (devices[device][func] == NULL) - pkt->set(0xFF); - else - devices[device][func]->readConfig(reg, pkt->getPtr()); + pkt->set(0xFF); break; default: panic("invalid access size(?) for PCI configspace!\n"); } pkt->result = Packet::Success; - return pioDelay; + return params()->pio_delay; } Tick PciConfigAll::write(Packet *pkt) { assert(pkt->result == Packet::Unknown); - assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize); - assert(pkt->getSize() == sizeof(uint8_t) || pkt->getSize() == sizeof(uint16_t) || - pkt->getSize() == sizeof(uint32_t)); - Addr daddr = pkt->getAddr() - pioAddr; - - int device = (daddr >> 11) & 0x1F; - int func = (daddr >> 8) & 0x7; - int reg = daddr & 0xFF; - - if (devices[device][func] == NULL) - panic("Attempting to write to config space on non-existant device\n"); - - DPRINTF(PciConfigAll, "write - va=%#x size=%d data=%#x\n", - pkt->getAddr(), pkt->getSize(), pkt->get()); - - switch (pkt->getSize()) { - case sizeof(uint8_t): - devices[device][func]->writeConfig(reg, pkt->get()); - break; - case sizeof(uint16_t): - devices[device][func]->writeConfig(reg, pkt->get()); - break; - case sizeof(uint32_t): - devices[device][func]->writeConfig(reg, pkt->get()); - break; - default: - panic("invalid pci config write size\n"); - } - pkt->result = Packet::Success; - return pioDelay; + panic("Attempting to write to config space on non-existant device\n"); } void -PciConfigAll::serialize(std::ostream &os) +PciConfigAll::addressRanges(AddrRangeList &range_list) { - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ + range_list.clear(); + range_list.push_back(RangeSize(pioAddr, params()->size)); } -void -PciConfigAll::unserialize(Checkpoint *cp, const std::string §ion) -{ - /* - * There is no state associated with this object that requires - * serialization. The only real state are the device pointers - * which are all setup by the constructor of the PciDev class - */ -} #ifndef DOXYGEN_SHOULD_SKIP_THIS BEGIN_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) - Param pio_addr; Param pio_latency; + Param bus; + Param size; SimObjectParam platform; SimObjectParam system; @@ -202,8 +106,9 @@ END_DECLARE_SIM_OBJECT_PARAMS(PciConfigAll) BEGIN_INIT_SIM_OBJECT_PARAMS(PciConfigAll) - INIT_PARAM(pio_addr, "Device Address"), INIT_PARAM(pio_latency, "Programmed IO latency"), + INIT_PARAM(bus, "Bus that this object handles config space for"), + INIT_PARAM(size, "The size of config space"), INIT_PARAM(platform, "platform"), INIT_PARAM(system, "system object") @@ -211,11 +116,13 @@ END_INIT_SIM_OBJECT_PARAMS(PciConfigAll) CREATE_SIM_OBJECT(PciConfigAll) { - BasicPioDevice::Params *p = new BasicPioDevice::Params; - p->pio_addr = pio_addr; + PciConfigAll::Params *p = new PciConfigAll::Params; p->pio_delay = pio_latency; p->platform = platform; p->system = system; + p->bus = bus; + p->size = size; + return new PciConfigAll(p); } diff --git a/src/dev/pciconfigall.hh b/src/dev/pciconfigall.hh index e60fd949b6..07eaf81125 100644 --- a/src/dev/pciconfigall.hh +++ b/src/dev/pciconfigall.hh @@ -42,11 +42,6 @@ #include "dev/io_device.hh" -static const uint32_t MAX_PCI_DEV = 32; -static const uint32_t MAX_PCI_FUNC = 8; - -class PciDev; - /** * PCI Config Space * All of PCI config space needs to return -1 on Tsunami, except @@ -54,45 +49,28 @@ class PciDev; * space and passes the requests on to TsunamiPCIDev devices as * appropriate. */ -class PciConfigAll : public BasicPioDevice +class PciConfigAll : public PioDevice { - private: - /** - * Pointers to all the devices that are registered with this - * particular config space. - */ - PciDev* devices[MAX_PCI_DEV][MAX_PCI_FUNC]; - public: + struct Params : public PioDevice::Params + { + Tick pio_delay; + Addr size; + int bus; + }; + const Params *params() const { return (const Params *)_params; } + /** * Constructor for PCIConfigAll * @param p parameters structure */ PciConfigAll(Params *p); - /** - * Check if a device exists. - * @param pcidev PCI device to check - * @param pcifunc PCI function to check - * @return true if device exists, false otherwise - */ - bool deviceExists(uint32_t pcidev, uint32_t pcifunc) - { return devices[pcidev][pcifunc] != NULL ? true : false; } - - /** - * Registers a device with the config space object. - * @param pcidev PCI device to register - * @param pcifunc PCI function to register - * @param device device to register - */ - void registerDevice(uint8_t pcidev, uint8_t pcifunc, PciDev *device) - { devices[pcidev][pcifunc] = device; } - /** * Read something in PCI config space. If the device does not exist * -1 is returned, if the device does exist its PciDev::ReadConfig (or the * virtual function that overrides) it is called. - * @param pkt Contains the address of the field to read. + * @param pkt Contains information about the read operation * @return Amount of time to do the read */ virtual Tick read(Packet *pkt); @@ -101,31 +79,17 @@ class PciConfigAll : public BasicPioDevice * Write to PCI config spcae. If the device does not exit the simulator * panics. If it does it is passed on the PciDev::WriteConfig (or the virtual * function that overrides it). - * @param req Contains the address to write to. - * @param data The data to write. - * @return The fault condition of the access. + * @param pkt Contains information about the write operation + * @return Amount of time to do the read */ virtual Tick write(Packet *pkt); - /** - * Start up function to check if more than one person is using an interrupt line - * and print a warning if such a case exists - */ - virtual void startup(); + void addressRanges(AddrRangeList &range_list); - /** - * Serialize this object to the given output stream. - * @param os The stream to serialize to. - */ - virtual void serialize(std::ostream &os); + private: + Addr pioAddr; - /** - * Reconstruct the state of this object from a checkpoint. - * @param cp The checkpoint use. - * @param section The section name of this object - */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); }; #endif // __PCICONFIGALL_HH__ diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index f8db2efbcb..62a7324ad4 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -53,9 +53,63 @@ using namespace std; + +PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid, + int funcid, Platform *p) + : PioPort(dev,p,"-pciconf"), device(dev), busId(busid), deviceId(devid), + functionId(funcid) +{ + configAddr = platform->calcConfigAddr(busId, deviceId, functionId); +} + + +Tick +PciDev::PciConfigPort::recvAtomic(Packet *pkt) +{ + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + return device->recvConfig(pkt); +} + +void +PciDev::PciConfigPort::recvFunctional(Packet *pkt) +{ + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + device->recvConfig(pkt); +} + +void +PciDev::PciConfigPort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) +{ + snoop.clear(); + resp.push_back(RangeSize(configAddr, PCI_CONFIG_SIZE+1)); +} + + +bool +PciDev::PciConfigPort::recvTiming(Packet *pkt) +{ + if (pkt->result == Packet::Nacked) { + resendNacked(pkt); + } else { + assert(pkt->result == Packet::Unknown); + assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + + PCI_CONFIG_SIZE); + Tick latency = device->recvConfig(pkt); + // turn packet around to go back to requester + pkt->makeTimingResponse(); + sendTiming(pkt, latency); + } + return true; +} + PciDev::PciDev(Params *p) : DmaDevice(p), plat(p->platform), configData(p->configData), - pioDelay(p->pio_delay) + pioDelay(p->pio_delay), configDelay(p->config_delay), + configPort(NULL) { // copy the config data from the PciConfigData object if (configData) { @@ -65,25 +119,56 @@ PciDev::PciDev(Params *p) } else panic("NULL pointer to configuration data"); - // Setup pointer in config space to point to this entry - if (p->configSpace->deviceExists(p->deviceNum, p->functionNum)) - panic("Two PCI devices occuping same dev: %#x func: %#x", - p->deviceNum, p->functionNum); - else - p->configSpace->registerDevice(p->deviceNum, p->functionNum, this); + plat->registerPciDevice(0, p->deviceNum, p->functionNum, + letoh(configData->config.interruptLine)); } void -PciDev::readConfig(int offset, uint8_t *data) +PciDev::init() { + if (!configPort) + panic("pci config port not connected to anything!"); + configPort->sendStatusChange(Port::RangeChange); + PioDevice::init(); +} + +Tick +PciDev::readConfig(Packet *pkt) +{ + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - *data = config.data[offset]; + pkt->allocate(); - DPRINTF(PCIDEV, + switch (pkt->getSize()) { + case sizeof(uint8_t): + pkt->set(config.data[offset]); + DPRINTF(PCIDEV, "read device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint16_t): + pkt->set(*(uint16_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint32_t): + pkt->set(*(uint32_t*)&config.data[offset]); + DPRINTF(PCIDEV, + "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); + break; + default: + panic("invalid access size(?) for PCI configspace!\n"); + } + pkt->result = Packet::Success; + return configDelay; + } void @@ -96,158 +181,128 @@ PciDev::addressRanges(AddrRangeList &range_list) range_list.push_back(RangeSize(BARAddrs[x],BARSize[x])); } -void -PciDev::readConfig(int offset, uint16_t *data) +Tick +PciDev::writeConfig(Packet *pkt) { + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; if (offset >= PCI_DEVICE_SPECIFIC) panic("Device specific PCI config space not implemented!\n"); - *data = *(uint16_t*)&config.data[offset]; - - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); -} - -void -PciDev::readConfig(int offset, uint32_t *data) -{ - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - *data = *(uint32_t*)&config.data[offset]; - - DPRINTF(PCIDEV, - "read device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", - params()->deviceNum, params()->functionNum, offset, *data); -} - - -void -PciDev::writeConfig(int offset, const uint8_t data) -{ - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 1 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_INTERRUPT_LINE: - config.interruptLine = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - case PCI_LATENCY_TIMER: - config.latencyTimer = data; - break; - /* Do nothing for these read-only registers */ - case PCI0_INTERRUPT_PIN: - case PCI0_MINIMUM_GRANT: - case PCI0_MAXIMUM_LATENCY: - case PCI_CLASS_CODE: - case PCI_REVISION_ID: - break; - default: - panic("writing to a read only register"); - } -} - -void -PciDev::writeConfig(int offset, const uint16_t data) -{ - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 2 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI_COMMAND: - config.command = data; - case PCI_STATUS: - config.status = data; - case PCI_CACHE_LINE_SIZE: - config.cacheLineSize = data; - break; - default: - panic("writing to a read only register"); - } -} - - -void -PciDev::writeConfig(int offset, const uint32_t data) -{ - if (offset >= PCI_DEVICE_SPECIFIC) - panic("Device specific PCI config space not implemented!\n"); - - DPRINTF(PCIDEV, - "write device: %#x function: %#x reg: %#x size: 4 data: %#x\n", - params()->deviceNum, params()->functionNum, offset, data); - - switch (offset) { - case PCI0_BASE_ADDR0: - case PCI0_BASE_ADDR1: - case PCI0_BASE_ADDR2: - case PCI0_BASE_ADDR3: - case PCI0_BASE_ADDR4: - case PCI0_BASE_ADDR5: - - uint32_t barnum, bar_mask; - Addr base_addr, base_size, space_base; - - barnum = BAR_NUMBER(offset); - - if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { - bar_mask = BAR_IO_MASK; - space_base = TSUNAMI_PCI0_IO; - } else { - bar_mask = BAR_MEM_MASK; - space_base = TSUNAMI_PCI0_MEMORY; + switch (pkt->getSize()) { + case sizeof(uint8_t): + switch (offset) { + case PCI0_INTERRUPT_LINE: + config.interruptLine = pkt->get(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get(); + case PCI_LATENCY_TIMER: + config.latencyTimer = pkt->get(); + break; + /* Do nothing for these read-only registers */ + case PCI0_INTERRUPT_PIN: + case PCI0_MINIMUM_GRANT: + case PCI0_MAXIMUM_LATENCY: + case PCI_CLASS_CODE: + case PCI_REVISION_ID: + break; + default: + panic("writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 1 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint16_t): + switch (offset) { + case PCI_COMMAND: + config.command = pkt->get(); + case PCI_STATUS: + config.status = pkt->get(); + case PCI_CACHE_LINE_SIZE: + config.cacheLineSize = pkt->get(); + break; + default: + panic("writing to a read only register"); + } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 2 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); + break; + case sizeof(uint32_t): + switch (offset) { + case PCI0_BASE_ADDR0: + case PCI0_BASE_ADDR1: + case PCI0_BASE_ADDR2: + case PCI0_BASE_ADDR3: + case PCI0_BASE_ADDR4: + case PCI0_BASE_ADDR5: - // Writing 0xffffffff to a BAR tells the card to set the - // value of the bar to size of memory it needs - if (letoh(data) == 0xffffffff) { - // This is I/O Space, bottom two bits are read only + uint32_t barnum, bar_mask; + Addr base_addr, base_size, space_base; - config.baseAddr[barnum] = letoh( - (~(BARSize[barnum] - 1) & ~bar_mask) | - (letoh(config.baseAddr[barnum]) & bar_mask)); - } else { - config.baseAddr[barnum] = letoh( - (letoh(data) & ~bar_mask) | - (letoh(config.baseAddr[barnum]) & bar_mask)); + barnum = BAR_NUMBER(offset); - if (letoh(config.baseAddr[barnum]) & ~bar_mask) { - base_addr = (letoh(data) & ~bar_mask) + space_base; - base_size = BARSize[barnum]; - BARAddrs[barnum] = base_addr; - - pioPort->sendStatusChange(Port::RangeChange); + if (BAR_IO_SPACE(letoh(config.baseAddr[barnum]))) { + bar_mask = BAR_IO_MASK; + space_base = TSUNAMI_PCI0_IO; + } else { + bar_mask = BAR_MEM_MASK; + space_base = TSUNAMI_PCI0_MEMORY; } + + // Writing 0xffffffff to a BAR tells the card to set the + // value of the bar to size of memory it needs + if (letoh(pkt->get()) == 0xffffffff) { + // This is I/O Space, bottom two bits are read only + + config.baseAddr[barnum] = letoh( + (~(BARSize[barnum] - 1) & ~bar_mask) | + (letoh(config.baseAddr[barnum]) & bar_mask)); + } else { + config.baseAddr[barnum] = letoh( + (letoh(pkt->get()) & ~bar_mask) | + (letoh(config.baseAddr[barnum]) & bar_mask)); + + if (letoh(config.baseAddr[barnum]) & ~bar_mask) { + base_addr = (letoh(pkt->get()) & ~bar_mask) + space_base; + base_size = BARSize[barnum]; + BARAddrs[barnum] = base_addr; + + pioPort->sendStatusChange(Port::RangeChange); + } + } + break; + + case PCI0_ROM_BASE_ADDR: + if (letoh(pkt->get()) == 0xfffffffe) + config.expansionROM = htole((uint32_t)0xffffffff); + else + config.expansionROM = pkt->get(); + break; + + case PCI_COMMAND: + // This could also clear some of the error bits in the Status + // register. However they should never get set, so lets ignore + // it for now + config.command = pkt->get(); + break; + + default: + DPRINTF(PCIDEV, "Writing to a read only register"); } + DPRINTF(PCIDEV, + "write device: %#x function: %#x register: %#x 4 bytes: data: %#x\n", + params()->deviceNum, params()->functionNum, offset, + (uint32_t)pkt->get()); break; - - case PCI0_ROM_BASE_ADDR: - if (letoh(data) == 0xfffffffe) - config.expansionROM = htole((uint32_t)0xffffffff); - else - config.expansionROM = data; - break; - - case PCI_COMMAND: - // This could also clear some of the error bits in the Status - // register. However they should never get set, so lets ignore - // it for now - config.command = data; - break; - default: - DPRINTF(PCIDEV, "Writing to a read only register"); + panic("invalid access size(?) for PCI configspace!\n"); } + pkt->result = Packet::Success; + return configDelay; + } void diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh index 92786427b3..20ab9364a8 100644 --- a/src/dev/pcidev.hh +++ b/src/dev/pcidev.hh @@ -47,8 +47,6 @@ #define BAR_IO_SPACE(x) ((x) & BAR_IO_SPACE_BIT) #define BAR_NUMBER(x) (((x) - PCI0_BASE_ADDR0) >> 0x2); -class PciConfigAll; - /** * This class encapulates the first 64 bytes of a singles PCI @@ -78,24 +76,41 @@ class PciConfigData : public SimObject Addr BARAddrs[6]; }; + /** * PCI device, base implemnation is only config space. - * Each device is connected to a PCIConfigSpace device - * which returns -1 for everything but the pcidevs that - * register with it. This object registers with the PCIConfig space - * object. */ class PciDev : public DmaDevice { - public: - struct Params : public ::PioDevice::Params + class PciConfigPort : public PioPort { - /** - * A pointer to the configspace all object that calls us when - * a read comes to this particular device/function. - */ - PciConfigAll *configSpace; + protected: + PciDev *device; + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt) ; + + virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + + int busId; + int deviceId; + int functionId; + + Addr configAddr; + + public: + PciConfigPort(PciDev *dev, int busid, int devid, int funcid, + Platform *p); + + friend class PioPort::SendEvent; + }; + + public: + struct Params : public PioDevice::Params + { /** * A pointer to the object that contains the first 64 bytes of * config space @@ -113,6 +128,9 @@ class PciDev : public DmaDevice /** The latency for pio accesses. */ Tick pio_delay; + + /** The latency for a config access. */ + Tick config_delay; }; public: @@ -164,6 +182,25 @@ class PciDev : public DmaDevice Platform *plat; PciConfigData *configData; Tick pioDelay; + Tick configDelay; + PciConfigPort *configPort; + + /** + * Write to the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick writeConfig(Packet *pkt); + + + /** + * Read from the PCI config space data that is stored locally. This may be + * overridden by the device but at some point it will eventually call this + * for normal operations that it does not need to override. + * @param pkt packet containing the write the offset into config space + */ + virtual Tick readConfig(Packet *pkt); public: Addr pciToDma(Addr pciAddr) const @@ -171,21 +208,25 @@ class PciDev : public DmaDevice void intrPost() - { plat->postPciInt(configData->config.interruptLine); } + { plat->postPciInt(letoh(configData->config.interruptLine)); } void intrClear() - { plat->clearPciInt(configData->config.interruptLine); } + { plat->clearPciInt(letoh(configData->config.interruptLine)); } uint8_t interruptLine() - { return configData->config.interruptLine; } + { return letoh(configData->config.interruptLine); } /** return the address ranges that this device responds to. * @params range_list range list to populate with ranges */ void addressRanges(AddrRangeList &range_list); + /** Do a PCI Configspace memory access. */ + Tick recvConfig(Packet *pkt) + { return pkt->isRead() ? readConfig(pkt) : writeConfig(pkt); } + /** * Constructor for PCI Dev. This function copies data from the * config file object PCIConfigData and registers the device with @@ -193,30 +234,7 @@ class PciDev : public DmaDevice */ PciDev(Params *params); - /** - * Write to the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the write - * @param data the data to write - */ - virtual void writeConfig(int offset, const uint8_t data); - virtual void writeConfig(int offset, const uint16_t data); - virtual void writeConfig(int offset, const uint32_t data); - - - /** - * Read from the PCI config space data that is stored locally. This may be - * overridden by the device but at some point it will eventually call this - * for normal operations that it does not need to override. - * @param offset the offset into config space - * @param size the size of the read - * @param data pointer to the location where the read value should be stored - */ - virtual void readConfig(int offset, uint8_t *data); - virtual void readConfig(int offset, uint16_t *data); - virtual void readConfig(int offset, uint32_t *data); + virtual void init(); /** * Serialize this object to the given output stream. @@ -230,5 +248,19 @@ class PciDev : public DmaDevice * @param section The section name of this object */ virtual void unserialize(Checkpoint *cp, const std::string §ion); + + virtual Port *getPort(const std::string &if_name, int idx = -1) + { + if (if_name == "config") { + if (configPort != NULL) + panic("pciconfig port already connected to."); + configPort = new PciConfigPort(this, params()->busNum, + params()->deviceNum, params()->functionNum, + params()->platform); + return configPort; + } + return DmaDevice::getPort(if_name, idx); + } + }; #endif // __DEV_PCIDEV_HH__ diff --git a/src/dev/pcireg.h b/src/dev/pcireg.h index 0aa4ba8ef1..a48abd4fa5 100644 --- a/src/dev/pcireg.h +++ b/src/dev/pcireg.h @@ -142,6 +142,7 @@ union PCIConfig { // Device specific offsets #define PCI_DEVICE_SPECIFIC 0x40 // 192 bytes +#define PCI_CONFIG_SIZE 0xFF // Some Vendor IDs #define PCI_VENDOR_DEC 0x1011 diff --git a/src/dev/platform.cc b/src/dev/platform.cc index ed021e3b67..8546b7805f 100644 --- a/src/dev/platform.cc +++ b/src/dev/platform.cc @@ -63,5 +63,21 @@ Platform::pciToDma(Addr pciAddr) const panic("No PCI dma support in platform."); } +void +Platform::registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, uint8_t intr) +{ + uint32_t bdf = bus << 16 | dev << 8 | func << 0; + if (pciDevices.find(bdf) != pciDevices.end()) + fatal("Two PCI devices have same bus:device:function\n"); + + if (intLines.test(intr)) + fatal("Two PCI devices have same interrupt line: %d\n", intr); + + pciDevices.insert(bdf); + + intLines.set(intr); +} + + DEFINE_SIM_OBJECT_CLASS_NAME("Platform", Platform) diff --git a/src/dev/platform.hh b/src/dev/platform.hh index 0e6f4ba4a8..1940dcad61 100644 --- a/src/dev/platform.hh +++ b/src/dev/platform.hh @@ -37,6 +37,9 @@ #ifndef __DEV_PLATFORM_HH__ #define __DEV_PLATFORM_HH__ +#include +#include + #include "sim/sim_object.hh" #include "arch/isa_traits.hh" @@ -52,9 +55,6 @@ class Platform : public SimObject /** Pointer to the interrupt controller */ IntrControl *intrctrl; - /** Pointer to the PCI configuration space */ - PciConfigAll *pciconfig; - /** Pointer to the UART, set by the uart */ Uart *uart; @@ -64,13 +64,20 @@ class Platform : public SimObject public: Platform(const std::string &name, IntrControl *intctrl); virtual ~Platform(); - virtual void init() { if (pciconfig == NULL) panic("PCI Config not set"); } virtual void postConsoleInt() = 0; virtual void clearConsoleInt() = 0; virtual Tick intrFrequency() = 0; virtual void postPciInt(int line); virtual void clearPciInt(int line); virtual Addr pciToDma(Addr pciAddr) const; + virtual Addr calcConfigAddr(int bus, int dev, int func) = 0; + virtual void registerPciDevice(uint8_t bus, uint8_t dev, uint8_t func, + uint8_t intr); + + private: + std::bitset<256> intLines; + std::set pciDevices; + }; #endif // __DEV_PLATFORM_HH__ diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc index a0223733bb..dddda1f1cb 100644 --- a/src/dev/sinic.cc +++ b/src/dev/sinic.cc @@ -37,7 +37,6 @@ #include "cpu/intr_control.hh" #include "dev/etherlink.hh" #include "dev/sinic.hh" -#include "dev/pciconfigall.hh" #include "mem/packet.hh" #include "sim/builder.hh" #include "sim/debug.hh" @@ -1623,7 +1622,6 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Device) SimObjectParam system; SimObjectParam platform; - SimObjectParam configspace; SimObjectParam configdata; Param pci_bus; Param pci_dev; @@ -1666,7 +1664,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Device) INIT_PARAM(system, "System pointer"), INIT_PARAM(platform, "Platform pointer"), - INIT_PARAM(configspace, "PCI Configspace"), INIT_PARAM(configdata, "PCI Config data"), INIT_PARAM(pci_bus, "PCI bus ID"), INIT_PARAM(pci_dev, "PCI device number"), @@ -1711,7 +1708,6 @@ CREATE_SIM_OBJECT(Device) params->name = getInstanceName(); params->platform = platform; params->system = system; - params->configSpace = configspace; params->configData = configdata; params->busNum = pci_bus; params->deviceNum = pci_dev; diff --git a/src/dev/tsunami.cc b/src/dev/tsunami.cc index c9e15581d6..8e740a72fc 100644 --- a/src/dev/tsunami.cc +++ b/src/dev/tsunami.cc @@ -95,6 +95,13 @@ Tsunami::pciToDma(Addr pciAddr) const return pchip->translatePciToDma(pciAddr); } + +Addr +Tsunami::calcConfigAddr(int bus, int dev, int func) +{ + return pchip->calcConfigAddr(bus, dev, func); +} + void Tsunami::serialize(std::ostream &os) { diff --git a/src/dev/tsunami.hh b/src/dev/tsunami.hh index 13fc4417c0..8bb66e9143 100644 --- a/src/dev/tsunami.hh +++ b/src/dev/tsunami.hh @@ -113,8 +113,14 @@ class Tsunami : public Platform */ virtual void clearPciInt(int line); + virtual Addr pciToDma(Addr pciAddr) const; + /** + * Calculate the configuration address given a bus/dev/func. + */ + virtual Addr calcConfigAddr(int bus, int dev, int func); + /** * Serialize this object to the given output stream. * @param os The stream to serialize to. diff --git a/src/dev/tsunami_pchip.cc b/src/dev/tsunami_pchip.cc index a376b908dd..8a542b9b07 100644 --- a/src/dev/tsunami_pchip.cc +++ b/src/dev/tsunami_pchip.cc @@ -302,6 +302,17 @@ TsunamiPChip::translatePciToDma(Addr busAddr) // if no match was found, then return the original address return busAddr; } +Addr +TsunamiPChip::calcConfigAddr(int bus, int dev, int func) +{ + assert(func < 8); + assert(dev < 32); + assert(bus == 0); + + return TsunamiPciBus0Config | (func << 8) | (dev << 11); +} + + void TsunamiPChip::serialize(std::ostream &os) diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh index 9f80f7d688..b9e9005265 100644 --- a/src/dev/tsunami_pchip.hh +++ b/src/dev/tsunami_pchip.hh @@ -45,6 +45,9 @@ class TsunamiPChip : public BasicPioDevice { protected: + + static const Addr TsunamiPciBus0Config = 0x801fe000000; + /** Pchip control register */ uint64_t pctl; @@ -80,6 +83,8 @@ class TsunamiPChip : public BasicPioDevice */ Addr translatePciToDma(Addr busAddr); + Addr calcConfigAddr(int bus, int dev, int func); + virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 19a3dc9e40..31271106bb 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -33,6 +33,7 @@ */ +#include "base/misc.hh" #include "base/trace.hh" #include "mem/bus.hh" #include "sim/builder.hh" @@ -40,6 +41,14 @@ Port * Bus::getPort(const std::string &if_name, int idx) { + if (if_name == "default") + if (defaultPort == NULL) { + defaultPort = new BusPort(csprintf("%s-default",name()), this, + defaultId); + return defaultPort; + } else + fatal("Default port already set\n"); + // if_name ignored? forced to be empty? int id = interfaces.size(); BusPort *bp = new BusPort(csprintf("%s-p%d", name(), id), this, id); @@ -47,11 +56,12 @@ Bus::getPort(const std::string &if_name, int idx) return bp; } -/** Get the ranges of anyone that we are connected to. */ +/** Get the ranges of anyone other buses that we are connected to. */ void Bus::init() { std::vector::iterator intIter; + for (intIter = interfaces.begin(); intIter != interfaces.end(); intIter++) (*intIter)->sendStatusChange(Port::RangeChange); } @@ -110,6 +120,7 @@ Bus::findPort(Addr addr, int id) int dest_id = -1; int i = 0; bool found = false; + AddrRangeIter iter; while (i < portList.size() && !found) { @@ -120,8 +131,18 @@ Bus::findPort(Addr addr, int id) } i++; } - if (dest_id == -1) + + // Check if this matches the default range + if (dest_id == -1) { + for (iter = defaultRange.begin(); iter != defaultRange.end(); iter++) { + if (*iter == addr) { + DPRINTF(Bus, " found addr 0x%llx on default\n", addr); + return defaultPort; + } + } panic("Unable to find destination for addr: %llx", addr); + } + // we shouldn't be sending this back to where it came from assert(dest_id != id); @@ -155,39 +176,52 @@ Bus::recvFunctional(Packet *pkt) void Bus::recvStatusChange(Port::Status status, int id) { + AddrRangeList ranges; + AddrRangeList snoops; + int x; + AddrRangeIter iter; + assert(status == Port::RangeChange && "The other statuses need to be implemented."); DPRINTF(BusAddrRanges, "received RangeChange from device id %d\n", id); - assert(id < interfaces.size() && id >= 0); - int x; - Port *port = interfaces[id]; - AddrRangeList ranges; - AddrRangeList snoops; - AddrRangeIter iter; - std::vector::iterator portIter; + if (id == defaultId) { + defaultRange.clear(); + defaultPort->getPeerAddressRanges(ranges, snoops); + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + defaultRange.push_back(*iter); + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for default\n", + iter->start, iter->end); + } + } else { - // Clean out any previously existent ids - for (portIter = portList.begin(); portIter != portList.end(); ) { - if (portIter->portId == id) - portIter = portList.erase(portIter); - else - portIter++; - } + assert((id < interfaces.size() && id >= 0) || id == -1); + Port *port = interfaces[id]; + std::vector::iterator portIter; - port->getPeerAddressRanges(ranges, snoops); + // Clean out any previously existent ids + for (portIter = portList.begin(); portIter != portList.end(); ) { + if (portIter->portId == id) + portIter = portList.erase(portIter); + else + portIter++; + } - // not dealing with snooping yet either - assert(snoops.size() == 0); - for(iter = ranges.begin(); iter != ranges.end(); iter++) { - DevMap dm; - dm.portId = id; - dm.range = *iter; + port->getPeerAddressRanges(ranges, snoops); - DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", - dm.range.start, dm.range.end, id); - portList.push_back(dm); + // not dealing with snooping yet either + assert(snoops.size() == 0); + for(iter = ranges.begin(); iter != ranges.end(); iter++) { + DevMap dm; + dm.portId = id; + dm.range = *iter; + + DPRINTF(BusAddrRanges, "Adding range %llx - %llx for id %d\n", + dm.range.start, dm.range.end, id); + portList.push_back(dm); + } } DPRINTF(MMU, "port list has %d entries\n", portList.size()); @@ -196,19 +230,47 @@ Bus::recvStatusChange(Port::Status status, int id) for (x = 0; x < interfaces.size(); x++) if (x != id) interfaces[x]->sendStatusChange(Port::RangeChange); + + if (id != defaultId && defaultPort) + defaultPort->sendStatusChange(Port::RangeChange); } void Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id) { std::vector::iterator portIter; + AddrRangeIter dflt_iter; + bool subset; resp.clear(); snoop.clear(); DPRINTF(BusAddrRanges, "received address range request, returning:\n"); + + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + resp.push_back(*dflt_iter); + DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n",dflt_iter->start, + dflt_iter->end); + } for (portIter = portList.begin(); portIter != portList.end(); portIter++) { - if (portIter->portId != id) { + subset = false; + for (dflt_iter = defaultRange.begin(); dflt_iter != defaultRange.end(); + dflt_iter++) { + if ((portIter->range.start < dflt_iter->start && + portIter->range.end >= dflt_iter->start) || + (portIter->range.start < dflt_iter->end && + portIter->range.end >= dflt_iter->end)) + fatal("Devices can not set ranges that itersect the default set\ + but are not a subset of the default set.\n"); + if (portIter->range.start >= dflt_iter->start && + portIter->range.end <= dflt_iter->end) { + subset = true; + DPRINTF(BusAddrRanges, " -- %#llX : %#llX is a SUBSET\n", + portIter->range.start, portIter->range.end); + } + } + if (portIter->portId != id && !subset) { resp.push_back(portIter->range); DPRINTF(BusAddrRanges, " -- %#llX : %#llX\n", portIter->range.start, portIter->range.end); diff --git a/src/mem/bus.hh b/src/mem/bus.hh index 9c7054b949..3a28968860 100644 --- a/src/mem/bus.hh +++ b/src/mem/bus.hh @@ -51,19 +51,22 @@ class Bus : public MemObject /** a globally unique id for this bus. */ int busId; + static const int defaultId = -1; + struct DevMap { int portId; Range range; }; std::vector portList; + AddrRangeList defaultRange; /** Function called by the port when the bus is recieving a Timing - transaction.*/ + transaction.*/ bool recvTiming(Packet *pkt); /** Function called by the port when the bus is recieving a Atomic - transaction.*/ + transaction.*/ Tick recvAtomic(Packet *pkt); /** Function called by the port when the bus is recieving a Functional @@ -159,6 +162,9 @@ class Bus : public MemObject * original send failed for whatever reason.*/ std::list retryList; + /** Port that handles requests that don't match any of the interfaces.*/ + Port *defaultPort; + public: /** A function used to return the port associated with this bus object. */ @@ -167,7 +173,7 @@ class Bus : public MemObject virtual void init(); Bus(const std::string &n, int bus_id) - : MemObject(n), busId(bus_id) {} + : MemObject(n), busId(bus_id), defaultPort(NULL) {} }; diff --git a/src/python/m5/objects/Bus.py b/src/python/m5/objects/Bus.py index 019e150346..e0278e6c33 100644 --- a/src/python/m5/objects/Bus.py +++ b/src/python/m5/objects/Bus.py @@ -4,4 +4,5 @@ from MemObject import MemObject class Bus(MemObject): type = 'Bus' port = VectorPort("vector port for connecting devices") + default = Port("Default port for requests that aren't handeled by a device.") bus_id = Param.Int(0, "blah") diff --git a/src/python/m5/objects/Pci.py b/src/python/m5/objects/Pci.py index 9e1e91b130..29014bb370 100644 --- a/src/python/m5/objects/Pci.py +++ b/src/python/m5/objects/Pci.py @@ -1,5 +1,5 @@ from m5.config import * -from Device import BasicPioDevice, DmaDevice +from Device import BasicPioDevice, DmaDevice, PioDevice class PciConfigData(SimObject): type = 'PciConfigData' @@ -38,18 +38,22 @@ class PciConfigData(SimObject): MaximumLatency = Param.UInt8(0x00, "Maximum Latency") MinimumGrant = Param.UInt8(0x00, "Minimum Grant") -class PciConfigAll(BasicPioDevice): +class PciConfigAll(PioDevice): type = 'PciConfigAll' + pio_latency = Param.Tick(1, "Programmed IO latency in simticks") + bus = Param.UInt8(0x00, "PCI bus to act as config space for") + size = Param.MemorySize32('16MB', "Size of config space") + class PciDevice(DmaDevice): type = 'PciDevice' abstract = True + config = Port("PCI configuration space port") pci_bus = Param.Int("PCI bus") pci_dev = Param.Int("PCI device number") pci_func = Param.Int("PCI function code") pio_latency = Param.Tick(1, "Programmed IO latency in simticks") configdata = Param.PciConfigData(Parent.any, "PCI Config data") - configspace = Param.PciConfigAll(Parent.any, "PCI Configspace") class PciFake(PciDevice): type = 'PciFake' From 329e32f8c63a5982b29c2d620e7d08708ec62fbd Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 6 Jul 2006 15:15:37 -0400 Subject: [PATCH 109/152] Now timing reads work in single level of cache with simple cpu src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: src/mem/cache/cache.hh: Changes to handle timing reads in Simple CPU (blocking buffers) --HG-- extra : convert_revision : a2e7d4287d7cdfd1bbf9c929ecbeafde499a5b9f --- src/mem/cache/base_cache.cc | 31 +++++++++++++++++++++++++++++++ src/mem/cache/base_cache.hh | 32 ++++++++++++++++++++++---------- src/mem/cache/cache.hh | 13 +------------ 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index aaaf1bdefb..15a21efa1b 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -98,6 +98,37 @@ BaseCache::CachePort::clearBlocked() blocked = false; } +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort) +{ + this->setFlags(AutoDelete); + pkt = NULL; +} + +BaseCache::CacheEvent::CacheEvent(CachePort *_cachePort, Packet *_pkt) + : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort), pkt(_pkt) +{ + this->setFlags(AutoDelete); +} + +void +BaseCache::CacheEvent::process() +{ + if (!pkt) + { + if (!cachePort->isCpuSide) + pkt = cachePort->cache->getPacket(); + //Else get coherence req + } + cachePort->sendTiming(pkt); +} + +const char * +BaseCache::CacheEvent::description() +{ + return "timing event\n"; +} + Port* BaseCache::getPort(const std::string &if_name, int idx) { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 2754fab5a6..5370a73c8f 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -79,9 +79,9 @@ class BaseCache : public MemObject { class CachePort : public Port { + public: BaseCache *cache; - public: CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); protected: @@ -110,10 +110,11 @@ class BaseCache : public MemObject struct CacheEvent : public Event { - Packet *pkt; CachePort *cachePort; + Packet *pkt; - CacheEvent(Packet *pkt, CachePort *cachePort); + CacheEvent(CachePort *_cachePort); + CacheEvent(CachePort *_cachePort, Packet *_pkt); void process(); const char *description(); }; @@ -147,6 +148,11 @@ class BaseCache : public MemObject fatal("No implementation"); } + virtual Packet *getPacket() + { + fatal("No implementation"); + } + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause @@ -388,7 +394,6 @@ class BaseCache : public MemObject if (!isBlockedForSnoop()) { memSidePort->clearBlocked(); } - } /** @@ -407,10 +412,13 @@ class BaseCache : public MemObject */ void setMasterRequest(RequestCause cause, Tick time) { + if (!doMasterRequest()) + { + BaseCache::CacheEvent * reqCpu = new BaseCache::CacheEvent(memSidePort); + reqCpu->schedule(time); + } uint8_t flag = 1<pktuest(time); } /** @@ -462,8 +470,10 @@ class BaseCache : public MemObject */ void respond(Packet *pkt, Tick time) { - assert("Implement\n" && 0); -// si->respond(pkt,time); + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); } /** @@ -476,8 +486,10 @@ class BaseCache : public MemObject if (!pkt->req->isUncacheable()) { missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; } - assert("Implement\n" && 0); -// si->respond(pkt,time); + pkt->makeTimingResponse(); + pkt->result = Packet::Success; + CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); + reqCpu->schedule(time); } /** diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 1243c9d9ec..2e77444a08 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -168,7 +168,7 @@ class Cache : public BaseCache * Selects a request to send on the bus. * @return The memory request to service. */ - Packet * getPacket(); + virtual Packet * getPacket(); /** * Was the request was sent successfully? @@ -241,17 +241,6 @@ class Cache : public BaseCache return missQueue->getMisses(); } - /** - * Send a response to the slave interface. - * @param req The request being responded to. - * @param time The time the response is ready. - */ - void respond(Packet * &pkt, Tick time) - { - //si->respond(pkt,time); - cpuSidePort->sendAtomic(pkt); - } - /** * Perform the access specified in the request and return the estimated * time of completion. This function can either update the hierarchy state From 8ae4f45bc4782b4ab1dc95dbca183e2cd926fc5b Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 16:06:00 -0400 Subject: [PATCH 110/152] Fixes for draining. src/cpu/simple/timing.cc: Update for changed return values. src/python/m5/__init__.py: Loop in order to make sure all objects are really drained. Objects may become undrained as other objects become drained (e.g. a bus-bridge has a packet, while a bus is empty, and the first drain() will cause the bus-bridge to give the packet to the bus). The only case we know every object is actually drained is if they all return immediately that they are drained. --HG-- extra : convert_revision : 80057a1d6d30381bd0b67b23549bd202f447c5cb --- src/cpu/simple/timing.cc | 4 ++-- src/python/m5/__init__.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index ad04c8d3bd..523d81d0bb 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -118,11 +118,11 @@ TimingSimpleCPU::drain(Event *drain_event) // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { changeState(SimObject::DrainedTiming); - return false; + return true; } else { changeState(SimObject::Draining); drainEvent = drain_event; - return true; + return false; } } diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 579785a469..7d35ee8b8a 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -213,14 +213,28 @@ atexit.register(cc_main.doExitCleanup) # matter since most scripts will probably 'from m5.objects import *'. import objects +# This loops until all objects have been fully drained. def doDrain(root): + all_drained = drain(root) + while (not all_drained): + all_drained = drain(root) + +# Tries to drain all objects. Draining might not be completed unless +# all objects return that they are drained on the first call. This is +# because as objects drain they may cause other objects to no longer +# be drained. +def drain(root): + all_drained = False drain_event = cc_main.createCountedDrain() unready_objects = root.startDrain(drain_event, True) # If we've got some objects that can't drain immediately, then simulate if unready_objects > 0: drain_event.setCount(unready_objects) simulate() + else: + all_drained = True cc_main.cleanupCountedDrain(drain_event) + return all_drained def resume(root): root.resume() From 8bf9709d912849a33c44cf3cd004a288d2106176 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 16:26:44 -0400 Subject: [PATCH 111/152] Two minor FS compile fixes. src/dev/tsunami_pchip.hh: Need ULL() for 32-bit hosts. src/sim/pseudo_inst.cc: Forgot to remove sampler include from here. --HG-- extra : convert_revision : 6ab6bdc721290167b4c2b78da3d28a4992eb24d5 --- src/dev/tsunami_pchip.hh | 2 +- src/sim/pseudo_inst.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/dev/tsunami_pchip.hh b/src/dev/tsunami_pchip.hh index b9e9005265..2c97a1feac 100644 --- a/src/dev/tsunami_pchip.hh +++ b/src/dev/tsunami_pchip.hh @@ -46,7 +46,7 @@ class TsunamiPChip : public BasicPioDevice { protected: - static const Addr TsunamiPciBus0Config = 0x801fe000000; + static const Addr TsunamiPciBus0Config = ULL(0x801fe000000); /** Pchip control register */ uint64_t pctl; diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index 869805f5c7..fcf0b957a7 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -37,7 +37,6 @@ #include "sim/pseudo_inst.hh" #include "arch/vtophys.hh" #include "cpu/base.hh" -#include "cpu/sampler/sampler.hh" #include "cpu/thread_context.hh" #include "cpu/quiesce_event.hh" #include "kern/kernel_stats.hh" From 1ccfdb442ff34f9f2b38ee7716b7baee99a397c2 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 6 Jul 2006 16:52:05 -0400 Subject: [PATCH 112/152] Timing cache works for hello world test. Still need 1) detailed CPU (blocking ability in cache) 1a) Multiple outstanding requests (need to keep track of times for events) 2)Multi-level support 3)MP coherece support 4)LL/SC support 5)Functional path needs to be correctly implemented (temporarily works without multiple outstanding requests (simple cpu)) src/cpu/simple/timing.cc: Temp hack because timing cpu doesn't export ports properly so single I/D cache communicates only through the Icache port. src/mem/cache/base_cache.cc: Handle marking MSHR's in service Add support for getting CSHR's src/mem/cache/base_cache.hh: Make these functions visible at the base cache level src/mem/cache/cache.hh: make the functions virtual src/mem/cache/cache_impl.hh: Rename the function to make sense src/mem/packet.hh: Accidentally clearing the needsResponse field when sending a response back. --HG-- extra : convert_revision : 2325d4e0b77e470fa9da91490317dc8ed88b17e2 --- src/cpu/simple/timing.cc | 7 ++++++- src/mem/cache/base_cache.cc | 10 ++++++++-- src/mem/cache/base_cache.hh | 11 +++++++++++ src/mem/cache/cache.hh | 4 ++-- src/mem/cache/cache_impl.hh | 2 +- src/mem/packet.hh | 19 ++++++++++--------- 6 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index ad04c8d3bd..036037ba9a 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -451,7 +451,12 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - cpu->completeIfetch(pkt); + if (cpu->_status == DcacheWaitResponse) + cpu->completeDataAccess(pkt); + else if (cpu->_status == IcacheWaitResponse) + cpu->completeIfetch(pkt); + else + assert("OOPS" && 0); return true; } diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 15a21efa1b..4fbda40745 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -118,9 +118,15 @@ BaseCache::CacheEvent::process() { if (!cachePort->isCpuSide) pkt = cachePort->cache->getPacket(); - //Else get coherence req + else + pkt = cachePort->cache->getCoherencePacket(); + bool success = cachePort->sendTiming(pkt); + cachePort->cache->sendResult(pkt, success); + return; } - cachePort->sendTiming(pkt); + //Know the packet to send, no need to mark in service (must succed) + bool success = cachePort->sendTiming(pkt); + assert(success); } const char * diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 5370a73c8f..f832735dbe 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -153,6 +153,17 @@ class BaseCache : public MemObject fatal("No implementation"); } + virtual Packet *getCoherencePacket() + { + fatal("No implementation"); + } + + virtual void sendResult(Packet* &pkt, bool success) + { + + fatal("No implementation"); + } + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 2e77444a08..ec5b800a8e 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -175,7 +175,7 @@ class Cache : public BaseCache * @param req The request. * @param success True if the request was sent successfully. */ - void sendResult(Packet * &pkt, bool success); + virtual void sendResult(Packet * &pkt, bool success); /** * Handles a response (cache line fill/write ack) from the bus. @@ -202,7 +202,7 @@ class Cache : public BaseCache * Selects a coherence message to forward to lower levels of the hierarchy. * @return The coherence message to forward. */ - Packet * getCoherenceReq(); + virtual Packet * getCoherencePacket(); /** * Snoops bus transactions to maintain coherence. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index aae5cbf018..a447ae3d58 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -350,7 +350,7 @@ Cache::pseudoFill(MSHR *mshr) template Packet * -Cache::getCoherenceReq() +Cache::getCoherencePacket() { return coherence->getPacket(); } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2b97ab0c1d..1325dfc5b8 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -183,19 +183,19 @@ class Packet ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, - ReadResp = IsRead | IsResponse, - WriteResp = IsWrite | IsResponse, + ReadResp = IsRead | IsResponse | NeedsResponse, + WriteResp = IsWrite | IsResponse | NeedsResponse, Writeback = IsWrite | IsRequest, SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, - SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, - HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, + SoftPFResp = IsRead | IsResponse | IsSWPrefetch | NeedsResponse, + HardPFResp = IsRead | IsResponse | IsHWPrefetch | NeedsResponse, InvalidateReq = IsInvalidate | IsRequest, WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | NeedsResponse, - UpgradeResp = IsInvalidate | IsResponse, - ReadExReq = IsRead | IsInvalidate | NeedsResponse, - ReadExResp = IsRead | IsInvalidate | IsResponse + UpgradeReq = IsInvalidate | IsRequest | NeedsResponse, + UpgradeResp = IsInvalidate | IsResponse | NeedsResponse, + ReadExReq = IsRead | IsInvalidate | IsRequest | NeedsResponse, + ReadExResp = IsRead | IsInvalidate | IsResponse | NeedsResponse }; /** Return the string name of the cmd field (for debugging and @@ -311,8 +311,9 @@ class Packet * should not be called. */ void makeTimingResponse() { assert(needsResponse()); + assert(isRequest()); int icmd = (int)cmd; - icmd &= ~(IsRequest | NeedsResponse); + icmd &= ~(IsRequest); icmd |= IsResponse; cmd = (Command)icmd; dest = src; From e7ccc94ea3cdc6130e66899fd905ca11da958727 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 17:53:26 -0400 Subject: [PATCH 113/152] Various serialization changes to make it possible for the O3CPU to checkpoint. src/arch/alpha/regfile.hh: Define serialize/unserialize functions on MiscRegFile itself. src/cpu/o3/regfile.hh: Remove old commented code. src/cpu/simple_thread.cc: src/cpu/simple_thread.hh: Push common serialization code to ThreadState level. Also allow the SimpleThread to be used for checkpointing by other models. src/cpu/thread_state.cc: src/cpu/thread_state.hh: Move common serialization code into ThreadState. --HG-- extra : convert_revision : ef64ef515355437439af967eda2e610e8c1b658b --- src/arch/alpha/regfile.hh | 4 ++ src/cpu/o3/regfile.hh | 4 -- src/cpu/simple_thread.cc | 78 ++++++++++++++++++++------------------- src/cpu/simple_thread.hh | 8 ++-- src/cpu/thread_state.cc | 43 +++++++++++++++++++++ src/cpu/thread_state.hh | 6 +++ 6 files changed, 98 insertions(+), 45 deletions(-) diff --git a/src/arch/alpha/regfile.hh b/src/arch/alpha/regfile.hh index 1025412cd3..9ecad6f429 100644 --- a/src/arch/alpha/regfile.hh +++ b/src/arch/alpha/regfile.hh @@ -112,6 +112,10 @@ namespace AlphaISA lock_flag = 0; lock_addr = 0; } + + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); #if FULL_SYSTEM protected: typedef uint64_t InternalProcReg; diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 6972f055f1..b6677b4b10 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -86,10 +86,6 @@ class PhysRegFile //The duplication is unfortunate but it's better than having //different ways to access certain registers. - //Add these in later when everything else is in place -// void serialize(std::ostream &os); -// void unserialize(Checkpoint *cp, const std::string §ion); - /** Reads an integer register. */ uint64_t readIntReg(PhysRegIndex reg_idx) { diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index db28b23e85..6255b33579 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -123,15 +123,32 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, tc = new ProxyThreadContext(this); } -SimpleThread::SimpleThread(RegFile *regFile) - : ThreadState(-1, -1, NULL, -1, NULL), cpu(NULL) -{ - regs = *regFile; - tc = new ProxyThreadContext(this); -} - #endif +SimpleThread::SimpleThread(ThreadContext *oldContext) +#if FULL_SYSTEM + : ThreadState(-1, -1) +#else + : ThreadState(-1, -1, NULL, -1, NULL) +#endif +{ + tc = new ProxyThreadContext(this); + regs.clear(); + + copyState(oldContext); + +#if FULL_SYSTEM + EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); + if (quiesce) { + quiesceEvent = quiesce; + } + Kernel::Statistics *stats = oldContext->getKernelStats(); + if (stats) { + kernelStats = stats; + } +#endif +} + SimpleThread::~SimpleThread() { delete tc; @@ -147,13 +164,8 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) assert(process == oldContext->getProcessPtr()); #endif - // copy over functional state - _status = oldContext->status(); - copyArchRegs(oldContext); - cpuId = oldContext->readCpuId(); -#if !FULL_SYSTEM - funcExeInst = oldContext->readFuncExeInst(); -#else + copyState(oldContext); +#if FULL_SYSTEM EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); if (quiesce) { // Point the quiesce event's TC at this TC so that it wakes up @@ -170,43 +182,33 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) oldContext->setStatus(ThreadContext::Unallocated); } +void +SimpleThread::copyState(ThreadContext *oldContext) +{ + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpuId = oldContext->readCpuId(); +#if !FULL_SYSTEM + funcExeInst = oldContext->readFuncExeInst(); +#endif +} + void SimpleThread::serialize(ostream &os) { - SERIALIZE_ENUM(_status); + ThreadState::serialize(os); regs.serialize(os); // thread_num and cpu_id are deterministic from the config - SERIALIZE_SCALAR(funcExeInst); - SERIALIZE_SCALAR(inst); - -#if FULL_SYSTEM - Tick quiesceEndTick = 0; - if (quiesceEvent->scheduled()) - quiesceEndTick = quiesceEvent->when(); - SERIALIZE_SCALAR(quiesceEndTick); - if (kernelStats) - kernelStats->serialize(os); -#endif } void SimpleThread::unserialize(Checkpoint *cp, const std::string §ion) { - UNSERIALIZE_ENUM(_status); + ThreadState::unserialize(cp, section); regs.unserialize(cp, section); // thread_num and cpu_id are deterministic from the config - UNSERIALIZE_SCALAR(funcExeInst); - UNSERIALIZE_SCALAR(inst); - -#if FULL_SYSTEM - Tick quiesceEndTick; - UNSERIALIZE_SCALAR(quiesceEndTick); - if (quiesceEndTick) - quiesceEvent->schedule(quiesceEndTick); - if (kernelStats) - kernelStats->unserialize(cp, section); -#endif } #if FULL_SYSTEM diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index de65e9891e..ff2639e10f 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -119,16 +119,18 @@ class SimpleThread : public ThreadState #else SimpleThread(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, MemObject *memobj); - // Constructor to use SimpleThread to pass reg file around. Not - // used for anything else. - SimpleThread(RegFile *regFile); #endif + + SimpleThread(ThreadContext *oldContext); + virtual ~SimpleThread(); virtual void takeOverFrom(ThreadContext *oldContext); void regStats(const std::string &name); + void copyState(ThreadContext *oldContext); + void serialize(std::ostream &os); void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc index 872678a41c..6a96560f19 100644 --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -31,6 +31,12 @@ #include "base/output.hh" #include "cpu/profile.hh" #include "cpu/thread_state.hh" +#include "sim/serialize.hh" + +#if FULL_SYSTEM +#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#endif #if FULL_SYSTEM ThreadState::ThreadState(int _cpuId, int _tid) @@ -49,6 +55,43 @@ ThreadState::ThreadState(int _cpuId, int _tid, Process *_process, numLoad = 0; } +void +ThreadState::serialize(std::ostream &os) +{ + SERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(funcExeInst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; + if (quiesceEvent->scheduled()) + quiesceEndTick = quiesceEvent->when(); + SERIALIZE_SCALAR(quiesceEndTick); + if (kernelStats) + kernelStats->serialize(os); +#endif +} + +void +ThreadState::unserialize(Checkpoint *cp, const std::string §ion) +{ + + UNSERIALIZE_ENUM(_status); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(funcExeInst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) + quiesceEvent->schedule(quiesceEndTick); + if (kernelStats) + kernelStats->unserialize(cp, section); +#endif +} + #if FULL_SYSTEM void diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh index cb1449ac5c..b03a2e2bb8 100644 --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -49,6 +49,8 @@ namespace Kernel { }; #endif +class Checkpoint; + /** * Struct for holding general thread state that is needed across CPU * models. This includes things such as pointers to the process, @@ -65,6 +67,10 @@ struct ThreadState { short _asid, MemObject *mem); #endif + void serialize(std::ostream &os); + + void unserialize(Checkpoint *cp, const std::string §ion); + void setCpuId(int id) { cpuId = id; } int readCpuId() { return cpuId; } From fbe3e22474184e537fe74f4e86277056026f0514 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 17:57:20 -0400 Subject: [PATCH 114/152] Fix the O3CPU to support the multi-pass method for checking if the system has fully drained. src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/decode.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/rename.hh: src/cpu/o3/rename_impl.hh: Return a value so that the CPU can instantly return from draining if the pipeline is already drained. src/cpu/o3/cpu.cc: Use values returned from pipeline stages so that the CPU can instantly return from draining if the pipeline is already drained. --HG-- extra : convert_revision : d8ef6b811644ea67c8b40c4719273fa224105811 --- src/cpu/o3/commit.hh | 2 +- src/cpu/o3/commit_impl.hh | 10 +++++++++- src/cpu/o3/cpu.cc | 25 +++++++++++++++++++------ src/cpu/o3/decode.hh | 2 +- src/cpu/o3/decode_impl.hh | 3 ++- src/cpu/o3/fetch.hh | 2 +- src/cpu/o3/fetch_impl.hh | 3 ++- src/cpu/o3/iew.hh | 2 +- src/cpu/o3/iew_impl.hh | 3 ++- src/cpu/o3/rename.hh | 2 +- src/cpu/o3/rename_impl.hh | 3 ++- 11 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 49ff5cdadb..c39bc10f98 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -188,7 +188,7 @@ class DefaultCommit void initStage(); /** Initializes the draining of commit. */ - void drain(); + bool drain(); /** Resumes execution after draining. */ void resume(); diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 2eb05afac2..b50c9a898f 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -350,10 +350,18 @@ DefaultCommit::initStage() } template -void +bool DefaultCommit::drain() { drainPending = true; + + // If it's already drained, return true. + if (rob->isEmpty() && !iewStage->hasStoresToWB()) { + cpu->signalDrained(); + return true; + } + + return false; } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index b182d5ca78..3a52fe4c2a 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -712,19 +712,27 @@ bool FullO3CPU::drain(Event *drain_event) { drainCount = 0; - drainEvent = drain_event; fetch.drain(); decode.drain(); rename.drain(); iew.drain(); commit.drain(); + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; // Wake the CPU and record activity so everything can drain out if - // the CPU is currently idle. - wakeCPU(); - activityRec.activity(); + // the CPU was not able to immediately drain. + if (_status != Drained) { + wakeCPU(); + activityRec.activity(); - return false; + return false; + } else { + return true; + } } template @@ -751,8 +759,13 @@ FullO3CPU::signalDrained() if (++drainCount == NumStages) { if (tickEvent.scheduled()) tickEvent.squash(); + _status = Drained; - drainEvent->process(); + + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } assert(drainCount <= 5); } diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 1e96f18848..7f5ecbc269 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -110,7 +110,7 @@ class DefaultDecode void setActiveThreads(std::list *at_ptr); /** Drains the decode stage. */ - void drain(); + bool drain(); /** Resumes execution after a drain. */ void resume() { } diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 71637883bc..8b851c0328 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -165,11 +165,12 @@ DefaultDecode::setActiveThreads(list *at_ptr) } template -void +bool DefaultDecode::drain() { // Decode is done draining at any time. cpu->signalDrained(); + return true; } template diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 9611f0455b..a793c73612 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -181,7 +181,7 @@ class DefaultFetch void processCacheCompletion(PacketPtr pkt); /** Begins the drain of the fetch stage. */ - void drain(); + bool drain(); /** Resumes execution after a drain. */ void resume(); diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 500b5304eb..c0cc189f27 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -385,12 +385,13 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) } template -void +bool DefaultFetch::drain() { // Fetch is ready to drain at any time. cpu->signalDrained(); drainPending = true; + return true; } template diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 774b6dcbdc..4908a66495 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -144,7 +144,7 @@ class DefaultIEW void setScoreboard(Scoreboard *sb_ptr); /** Drains IEW stage. */ - void drain(); + bool drain(); /** Resumes execution after a drain. */ void resume(); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index c3aa748ae8..0d82645e33 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -354,11 +354,12 @@ DefaultIEW::setScoreboard(Scoreboard *sb_ptr) } template -void +bool DefaultIEW::drain() { // IEW is ready to drain at any time. cpu->signalDrained(); + return true; } template diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 538dd9bb43..034087febb 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -158,7 +158,7 @@ class DefaultRename void setScoreboard(Scoreboard *_scoreboard); /** Drains the rename stage. */ - void drain(); + bool drain(); /** Resumes execution after a drain. */ void resume() { } diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index fddbae3dba..805a72808a 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -257,11 +257,12 @@ DefaultRename::setScoreboard(Scoreboard *_scoreboard) } template -void +bool DefaultRename::drain() { // Rename is ready to switch out at any time. cpu->signalDrained(); + return true; } template From fff75316771331ec3247cbd6e424a93b252a1e29 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 23:13:38 -0400 Subject: [PATCH 115/152] Support serializing and unserializing in the O3 CPU. Also a few small fixes for draining/switching CPUs. src/cpu/o3/commit_impl.hh: Fix to clear drainPending variable on call to resume. src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: Support serializing and unserializing in the O3 CPU. src/cpu/o3/lsq_impl.hh: Be sure to say we have no stores to write back if the active thread list is empty. src/cpu/simple_thread.cc: src/cpu/simple_thread.hh: Slightly change how SimpleThread is used to copy from other ThreadContexts. --HG-- extra : convert_revision : 92a5109b3783a989d5b451036061ef82c56d3121 --- src/cpu/o3/commit_impl.hh | 1 + src/cpu/o3/cpu.cc | 61 +++++++++++++++++++++++++++++++++------ src/cpu/o3/cpu.hh | 8 ++++- src/cpu/o3/lsq_impl.hh | 3 ++ src/cpu/simple_thread.cc | 32 +++++++++++--------- src/cpu/simple_thread.hh | 4 ++- 6 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index b50c9a898f..39e1cf3fe4 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -377,6 +377,7 @@ template void DefaultCommit::resume() { + drainPending = false; } template diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 3a52fe4c2a..f345fe82dc 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -707,6 +707,47 @@ FullO3CPU::haltContext(int tid) */ } +template +void +FullO3CPU::serialize(std::ostream &os) +{ + SERIALIZE_ENUM(_status); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); + } +} + +template +void +FullO3CPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ENUM(_status); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + for (int i = 0; i < thread.size(); i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); + } +} + template bool FullO3CPU::drain(Event *drain_event) @@ -717,15 +758,16 @@ FullO3CPU::drain(Event *drain_event) rename.drain(); iew.drain(); commit.drain(); - // A bit of a hack...set the drainEvent after all the drain() - // calls have been made, that way if all of the stages drain - // immediately, the signalDrained() function knows not to call - // process on the drain event. - drainEvent = drain_event; // Wake the CPU and record activity so everything can drain out if // the CPU was not able to immediately drain. - if (_status != Drained) { + if (getState() != SimObject::DrainedTiming) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + wakeCPU(); activityRec.activity(); @@ -739,14 +781,15 @@ template void FullO3CPU::resume() { - if (_status == SwitchedOut) - return; fetch.resume(); decode.resume(); rename.resume(); iew.resume(); commit.resume(); + if (_status == SwitchedOut || _status == Idle) + return; + if (!tickEvent.scheduled()) tickEvent.schedule(curTick); _status = Running; @@ -760,7 +803,7 @@ FullO3CPU::signalDrained() if (tickEvent.scheduled()) tickEvent.squash(); - _status = Drained; + changeState(SimObject::DrainedTiming); if (drainEvent) { drainEvent->process(); diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index cf37476014..5b881e5580 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -111,7 +111,6 @@ class FullO3CPU : public BaseO3CPU Idle, Halted, Blocked, - Drained, SwitchedOut }; @@ -266,6 +265,13 @@ class FullO3CPU : public BaseO3CPU /** Update The Order In Which We Process Threads. */ void updateThreadPriority(); + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: /** Executes a syscall on this cycle. * --------------------------------------- * Note: this is a virtual function. CPU-Specific diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 5173f8be11..89fd1a71da 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -502,6 +502,9 @@ LSQ::hasStoresToWB() { list::iterator active_threads = (*activeThreads).begin(); + if ((*activeThreads).empty()) + return false; + while (active_threads != (*activeThreads).end()) { unsigned tid = *active_threads++; if (!hasStoresToWB(tid)) diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 6255b33579..af1db2ff27 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -125,7 +125,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, #endif -SimpleThread::SimpleThread(ThreadContext *oldContext) +SimpleThread::SimpleThread() #if FULL_SYSTEM : ThreadState(-1, -1) #else @@ -134,19 +134,6 @@ SimpleThread::SimpleThread(ThreadContext *oldContext) { tc = new ProxyThreadContext(this); regs.clear(); - - copyState(oldContext); - -#if FULL_SYSTEM - EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); - if (quiesce) { - quiesceEvent = quiesce; - } - Kernel::Statistics *stats = oldContext->getKernelStats(); - if (stats) { - kernelStats = stats; - } -#endif } SimpleThread::~SimpleThread() @@ -182,6 +169,23 @@ SimpleThread::takeOverFrom(ThreadContext *oldContext) oldContext->setStatus(ThreadContext::Unallocated); } +void +SimpleThread::copyTC(ThreadContext *context) +{ + copyState(context); + +#if FULL_SYSTEM + EndQuiesceEvent *quiesce = context->getQuiesceEvent(); + if (quiesce) { + quiesceEvent = quiesce; + } + Kernel::Statistics *stats = context->getKernelStats(); + if (stats) { + kernelStats = stats; + } +#endif +} + void SimpleThread::copyState(ThreadContext *oldContext) { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index ff2639e10f..d36853db4c 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -121,7 +121,7 @@ class SimpleThread : public ThreadState MemObject *memobj); #endif - SimpleThread(ThreadContext *oldContext); + SimpleThread(); virtual ~SimpleThread(); @@ -129,6 +129,8 @@ class SimpleThread : public ThreadState void regStats(const std::string &name); + void copyTC(ThreadContext *context); + void copyState(ThreadContext *oldContext); void serialize(std::ostream &os); From 6872b99c29cd4263062bb8b3ef15aa5a9f2532d4 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 6 Jul 2006 23:16:22 -0400 Subject: [PATCH 116/152] Be sure to call resume after restoring from a checkpoint. --HG-- extra : convert_revision : 4d672917038779a23f4ce7eb5d4e3039c1f5d726 --- src/python/m5/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 7d35ee8b8a..dc3af70003 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -250,6 +250,7 @@ def checkpoint(root): def restoreCheckpoint(root): print "Restoring from checkpoint" cc_main.unserializeAll() + resume(root) def changeToAtomic(system): if not isinstance(system, objects.Root) and not isinstance(system, System): From c355df5bfea757604113104c99998fb232539a5d Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 7 Jul 2006 04:06:26 -0400 Subject: [PATCH 117/152] Fix so that O3CPU doesnt segfault on exit. Major thing was to not execute commit if there are no active threads in CPU. src/cpu/o3/alpha/thread_context.hh: call deallocate instead of deallocateContext src/cpu/o3/commit_impl.hh: dont run commit stage if there are no instructions src/cpu/o3/cpu.cc: add deallocate event, deactivateThread function, and edit deallocateContext. src/cpu/o3/cpu.hh: add deallocate event and add optional delay to deallocateContext src/cpu/o3/thread_context.hh: optional delay for deallocate src/cpu/o3/thread_context_impl.hh: edit DPRINTFs to say Thread Context instead of Alpha TC src/cpu/thread_context.hh: optional delay src/sim/syscall_emul.hh: name stuff --HG-- extra : convert_revision : f4033e1f66b3043d30ad98dcc70d8b193dea70b6 --- src/cpu/o3/alpha/thread_context.hh | 15 +- src/cpu/o3/commit_impl.hh | 3 + src/cpu/o3/cpu.cc | 249 +++++++++++++++++------------ src/cpu/o3/cpu.hh | 50 +++++- src/cpu/o3/thread_context.hh | 2 +- src/cpu/o3/thread_context_impl.hh | 16 +- src/cpu/thread_context.hh | 4 +- src/sim/syscall_emul.hh | 1 - 8 files changed, 218 insertions(+), 122 deletions(-) diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 78b0ee7882..ad52b0d2e8 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -70,18 +70,19 @@ class AlphaTC : public O3ThreadContext { panic("Not supported on Alpha!"); } - // This function exits the thread context in the CPU and returns - // 1 if the CPU has no more active threads (meaning it's OK to exit); - // Used in syscall-emulation mode when a thread executes the 'exit' - // syscall. + /** This function exits the thread context in the CPU and returns + * 1 if the CPU has no more active threads (meaning it's OK to exit); + * Used in syscall-emulation mode when a thread executes the 'exit' + * syscall. + */ virtual int exit() { - this->cpu->deallocateContext(this->thread->readTid()); + this->deallocate(); // If there are still threads executing in the system if (this->cpu->numActiveThreads()) - return 0; + return 0; // don't exit simulation else - return 1; + return 1; // exit simulation } }; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 06b8e8a954..53d247e978 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -562,6 +562,9 @@ DefaultCommit::tick() return; } + if ((*activeThreads).size() <= 0) + return; + list::iterator threads = (*activeThreads).begin(); // Check if any of the threads are done squashing. Change the diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c88146fa6e..0a564169ae 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -114,6 +114,36 @@ FullO3CPU::ActivateThreadEvent::description() return "FullO3CPU \"Activate Thread\" event"; } +template +FullO3CPU::DeallocateContextEvent::DeallocateContextEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template +void +FullO3CPU::DeallocateContextEvent::init(int thread_num, + FullO3CPU *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template +void +FullO3CPU::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + cpu->removeThread(tid); +} + +template +const char * +FullO3CPU::DeallocateContextEvent::description() +{ + return "FullO3CPU \"Deallocate Context\" event"; +} + template FullO3CPU::FullO3CPU(Params *params) : BaseO3CPU(params), @@ -459,6 +489,118 @@ FullO3CPU::init() commit.setThreads(thread); } +template +void +FullO3CPU::activateThread(unsigned tid) +{ + list::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template +void +FullO3CPU::deactivateThread(unsigned tid) +{ + //Remove From Active List, if Active + list::iterator thread_it = + find(activeThreads.begin(), activeThreads.end(), tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template +void +FullO3CPU::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template +void +FullO3CPU::deallocateContext(int tid, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleDeallocateContextEvent(tid, delay); + } else { + deactivateThread(tid); + removeThread(tid); + } +} + +template +void +FullO3CPU::suspendContext(int tid) +{ + DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); + unscheduleTickEvent(); + _status = Idle; +/* + //Remove From Active List, if Active + list::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(isActive); + } +*/ +} + +template +void +FullO3CPU::haltContext(int tid) +{ + DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); +/* + //Remove From Active List, if Active + list::iterator isActive = find( + activeThreads.begin(), activeThreads.end(), tid); + + if (isActive != activeThreads.end()) { + DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(isActive); + + removeThread(tid); + } +*/ +} + template void FullO3CPU::insertThread(unsigned tid) @@ -511,7 +653,7 @@ template void FullO3CPU::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread from CPU."); + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU."); // Copy Thread Data From RegFile // If thread is suspended, it might be re-allocated @@ -537,6 +679,8 @@ FullO3CPU::removeThread(unsigned tid) fetch.squash(0,tid); decode.squash(tid); rename.squash(tid); + iew.squash(tid); + commit.rob->squash(commit.rob->readHeadInst(tid)->seqNum, tid); assert(iew.ldstQueue.getCount(tid) == 0); @@ -600,113 +744,12 @@ FullO3CPU::activateWhenReady(int tid) //blocks fetch contextSwitch = true; + //@todo: dont always add to waitlist //do waitlist cpuWaitList.push_back(tid); } } -template -void -FullO3CPU::activateThread(unsigned int tid) -{ - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive == activeThreads.end()) { - DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", - tid); - - activeThreads.push_back(tid); - } -} - - -template -void -FullO3CPU::activateContext(int tid, int delay) -{ - // Needs to set each stage to running as well. - if (delay){ - DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " - "on cycle %d\n", tid, curTick + cycles(delay)); - scheduleActivateThreadEvent(tid, delay); - } else { - activateThread(tid); - } - - if(lastActivatedCycle < curTick) { - scheduleTickEvent(delay); - - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); - - lastActivatedCycle = curTick; - - _status = Running; - } -} - -template -void -FullO3CPU::suspendContext(int tid) -{ - DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - unscheduleTickEvent(); - _status = Idle; -/* - //Remove From Active List, if Active - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - } -*/ -} - -template -void -FullO3CPU::deallocateContext(int tid) -{ - DPRINTF(O3CPU,"[tid:%i]: Deallocating Thread Context", tid); - - //Remove From Active List, if Active - list::iterator thread_it = - find(activeThreads.begin(), activeThreads.end(), tid); - - if (thread_it != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(thread_it); - - removeThread(tid); - } -} - -template -void -FullO3CPU::haltContext(int tid) -{ - DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); -/* - //Remove From Active List, if Active - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - - removeThread(tid); - } -*/ -} - template void FullO3CPU::switchOut() diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index bd04516019..476b5ffb33 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -197,6 +197,49 @@ class FullO3CPU : public BaseO3CPU /** The tick event used for scheduling CPU ticks. */ ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule cpu to deallocate thread context.*/ + void scheduleDeallocateContextEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + deallocateContextEvent[tid].reschedule(curTick + cycles(delay)); + else if (!deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule thread deallocation in CPU */ + void unscheduleDeallocateContextEvent(int tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -219,7 +262,10 @@ class FullO3CPU : public BaseO3CPU { return activeThreads.size(); } /** Add Thread to Active Threads List */ - void activateThread(unsigned int tid); + void activateThread(unsigned tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(unsigned tid); /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -247,7 +293,7 @@ class FullO3CPU : public BaseO3CPU /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. */ - void deallocateContext(int tid); + void deallocateContext(int tid, int delay = 1); /** Remove Thread from Active Threads List && * Remove Thread Context from CPU. diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index d097ee63eb..df8d1a6d8b 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -112,7 +112,7 @@ class O3ThreadContext : public ThreadContext virtual void suspend(); /** Set the status to Unallocated. */ - virtual void deallocate(); + virtual void deallocate(int delay = 0); /** Set the status to Halted. */ virtual void halt(); diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index cfb71f6238..bf8cbf850d 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -115,7 +115,8 @@ template void O3ThreadContext::activate(int delay) { - DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling activate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Active) return; @@ -139,7 +140,8 @@ template void O3ThreadContext::suspend() { - DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); + DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Suspended) return; @@ -163,22 +165,24 @@ O3ThreadContext::suspend() template void -O3ThreadContext::deallocate() +O3ThreadContext::deallocate(int delay) { - DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); + DPRINTF(O3CPU, "Calling deallocate on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Unallocated) return; thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); + cpu->deallocateContext(thread->readTid(), delay); } template void O3ThreadContext::halt() { - DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); + DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", + getThreadNum()); if (thread->status() == ThreadContext::Halted) return; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 70d7051441..e019e22bc3 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -143,7 +143,7 @@ class ThreadContext virtual void suspend() = 0; /// Set the status to Unallocated. - virtual void deallocate() = 0; + virtual void deallocate(int delay = 0) = 0; /// Set the status to Halted. virtual void halt() = 0; @@ -318,7 +318,7 @@ class ProxyThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index a3990e2fd1..a3ff006efb 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -27,7 +27,6 @@ * * Authors: Steve Reinhardt * Kevin Lim - * Korey Sewell */ #ifndef __SIM_SYSCALL_EMUL_HH__ From ea11c7bdbefc8eb640f875cdf91a6d6bed398ec4 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 7 Jul 2006 15:15:11 -0400 Subject: [PATCH 118/152] Update cpus to use the getPort function to use a connector object to connect the I/D cache ports to memory configs/test/test.py: Update to use new cpu getPort functionality src/cpu/base.cc: Make cpu's a memObject to expose getPort interface src/cpu/base.hh: Make cpu's a memObject to export getPort interface src/cpu/simple/atomic.cc: src/cpu/simple/atomic.hh: src/cpu/simple/timing.cc: src/cpu/simple/timing.hh: Now use the connector via getPort interface src/mem/cache/base_cache.cc: Make sure the cache recognizes all port names --HG-- extra : convert_revision : dbfefa978ec755bc8aa6f962ae158acf32dafe61 --- configs/test/test.py | 2 ++ src/cpu/base.cc | 2 +- src/cpu/base.hh | 5 +++-- src/cpu/simple/atomic.cc | 22 ++++++++++++++++------ src/cpu/simple/atomic.hh | 2 ++ src/cpu/simple/timing.cc | 19 ++++++++++--------- src/cpu/simple/timing.hh | 2 ++ src/mem/cache/base_cache.cc | 8 +++++++- 8 files changed, 43 insertions(+), 19 deletions(-) diff --git a/configs/test/test.py b/configs/test/test.py index 625304a08a..e7b0971ef3 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -75,6 +75,8 @@ else: cpu = AtomicSimpleCPU() cpu.workload = process cpu.mem = magicbus +cpu.icache_port=magicbus.port +cpu.dcache_port=magicbus.port system = System(physmem = mem, cpu = cpu) mem.port = magicbus.port diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 0b9c80591b..548f012dfc 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -63,7 +63,7 @@ BaseCPU::BaseCPU(Params *p) params(p), number_of_threads(p->numberOfThreads), system(p->system) #else BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), params(p), + : MemObject(p->name), clock(p->clock), params(p), number_of_threads(p->numberOfThreads), system(p->system) #endif { diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 5256a411f4..2be6e4e81b 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -37,15 +37,16 @@ #include "base/statistics.hh" #include "config/full_system.hh" #include "sim/eventq.hh" -#include "sim/sim_object.hh" +#include "mem/mem_object.hh" #include "arch/isa_traits.hh" class BranchPred; class CheckerCPU; class ThreadContext; class System; +class Port; -class BaseCPU : public SimObject +class BaseCPU : public MemObject { protected: // CPU's clock period in terms of the number of ticks of curTime. diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index b7202cbbb7..12bfdeb9bf 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -55,18 +55,28 @@ AtomicSimpleCPU::TickEvent::description() return "AtomicSimpleCPU tick event"; } +Port * +AtomicSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void AtomicSimpleCPU::init() { //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); +// Port *mem_dport = mem->getPort(""); +// dcachePort.setPeer(mem_dport); +// mem_dport->setPeer(&dcachePort); - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); +// Port *mem_iport = mem->getPort(""); +// icachePort.setPeer(mem_iport); +// mem_iport->setPeer(&icachePort); BaseCPU::init(); #if FULL_SYSTEM diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 951a8da063..179b4a7211 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -122,6 +122,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 036037ba9a..170c78d3a2 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -37,19 +37,20 @@ using namespace std; using namespace TheISA; +Port * +TimingSimpleCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return &dcachePort; + else if (if_name == "icache_port") + return &icachePort; + else + panic("No Such Port\n"); +} void TimingSimpleCPU::init() { - //Create Memory Ports (conect them up) - Port *mem_dport = mem->getPort(""); - dcachePort.setPeer(mem_dport); - mem_dport->setPeer(&dcachePort); - - Port *mem_iport = mem->getPort(""); - icachePort.setPeer(mem_iport); - mem_iport->setPeer(&icachePort); - BaseCPU::init(); #if FULL_SYSTEM for (int i = 0; i < threadContexts.size(); ++i) { diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index c360e553e4..d5b5d6648d 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -130,6 +130,8 @@ class TimingSimpleCPU : public BaseSimpleCPU public: + virtual Port *getPort(const std::string &if_name, int idx = -1); + virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 4fbda40745..b2caca765c 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -144,7 +144,13 @@ BaseCache::getPort(const std::string &if_name, int idx) cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); return cpuSidePort; } - if (if_name == "functional") + else if (if_name == "functional") + { + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "cpu_side") { if(cpuSidePort == NULL) cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); From 76c110d924d2adfa209ad4302af791dfe6f67946 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 7 Jul 2006 15:16:41 -0400 Subject: [PATCH 119/152] Remove hack now that ports work properly --HG-- extra : convert_revision : 43c22294867d7cbbc67ae66ec41a1d1c89f5a59d --- src/cpu/simple/timing.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 170c78d3a2..12a47fb3e7 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -452,12 +452,7 @@ TimingSimpleCPU::completeIfetch(Packet *pkt) bool TimingSimpleCPU::IcachePort::recvTiming(Packet *pkt) { - if (cpu->_status == DcacheWaitResponse) - cpu->completeDataAccess(pkt); - else if (cpu->_status == IcacheWaitResponse) - cpu->completeIfetch(pkt); - else - assert("OOPS" && 0); + cpu->completeIfetch(pkt); return true; } From 018ba50f2c05e07c7bd1c951db8ba33402c323dc Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 7 Jul 2006 15:38:15 -0400 Subject: [PATCH 120/152] Switch out fixes for CPUs. src/cpu/o3/cpu.cc: Fix up keeping proper state when switched out and drained. src/cpu/simple/timing.cc: src/cpu/simple/timing.hh: Keep track of the event we use to schedule fetch initially and upon resume. We may have to cancel the event if the CPU is switched out. --HG-- extra : convert_revision : 60a2a1bd2cdc67bd53ca4a67aa77166c826a4c8c --- src/cpu/o3/cpu.cc | 4 +++- src/cpu/simple/timing.cc | 24 ++++++++++++++++++------ src/cpu/simple/timing.hh | 2 ++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index f345fe82dc..ceba74ef3c 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -400,7 +400,8 @@ FullO3CPU::tick() } if (!tickEvent.scheduled()) { - if (_status == SwitchedOut) { + if (_status == SwitchedOut || + getState() == SimObject::DrainedTiming) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -793,6 +794,7 @@ FullO3CPU::resume() if (!tickEvent.scheduled()) tickEvent.schedule(curTick); _status = Running; + changeState(SimObject::Timing); } template diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 6774d79a97..eb58959496 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -89,6 +89,7 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) _status = Idle; ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; + fetchEvent = NULL; state = SimObject::Timing; } @@ -130,9 +131,15 @@ void TimingSimpleCPU::resume() { if (_status != SwitchedOut && _status != Idle) { - Event *e = - new EventWrapper(this, true); - e->schedule(curTick); + // Delete the old event if it existed. + if (fetchEvent) { + assert(!fetchEvent->scheduled()); + delete fetchEvent; + } + + fetchEvent = + new EventWrapper(this, false); + fetchEvent->schedule(curTick); } } @@ -147,6 +154,11 @@ TimingSimpleCPU::switchOut() { assert(status() == Running || status() == Idle); _status = SwitchedOut; + + // If we've been scheduled to resume but are then told to switch out, + // we'll need to cancel it. + if (fetchEvent && fetchEvent->scheduled()) + fetchEvent->deschedule(); } @@ -178,9 +190,9 @@ TimingSimpleCPU::activateContext(int thread_num, int delay) notIdleFraction++; _status = Running; // kick things off by initiating the fetch of the next instruction - Event *e = - new EventWrapper(this, true); - e->schedule(curTick + cycles(delay)); + fetchEvent = + new EventWrapper(this, false); + fetchEvent->schedule(curTick + cycles(delay)); } diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index c360e553e4..f9bc0f352c 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -66,6 +66,8 @@ class TimingSimpleCPU : public BaseSimpleCPU Event *drainEvent; + Event *fetchEvent; + private: class CpuPort : public Port From 74d4d671386252d0d5f570b69fc63cb332757580 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 7 Jul 2006 15:58:03 -0400 Subject: [PATCH 121/152] Minor fix for SMT Hello Worlds to finish correctly. Still, there is a problem with the LSQ and indexing out of range in the buffer. I havent nailed down the fix yet, but it's coming ... src/cpu/o3/commit_impl.hh: add space to DPRINT src/cpu/o3/cpu.cc: add newline to DPRINT src/cpu/o3/rob.hh: src/cpu/o3/rob_impl.hh: Each thread needs it's own squashedSeqNum for the case where they are both squashing at the same time and they dont write over each other's squash number. --HG-- extra : convert_revision : 2155421a8b5b20e4544eea3d3c53d3e715465fa6 --- src/cpu/o3/commit_impl.hh | 2 +- src/cpu/o3/cpu.cc | 2 +- src/cpu/o3/rob.hh | 2 +- src/cpu/o3/rob_impl.hh | 14 ++++++++------ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index dc2c8cbbbb..e1f8e1f1e4 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -585,7 +585,7 @@ DefaultCommit::tick() commitStatus[tid] = Running; } else { DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" - "insts this cycle.\n", tid); + " insts this cycle.\n", tid); rob->doSquash(tid); toIEW->commitInfo[tid].robSquashing = true; wroteToTimeBuffer = true; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ec02a39295..c46276d5a3 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -653,7 +653,7 @@ template void FullO3CPU::removeThread(unsigned tid) { - DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU."); + DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid); // Copy Thread Data From RegFile // If thread is suspended, it might be re-allocated diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index 6f8080ef44..7cd5a51433 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -308,7 +308,7 @@ class ROB private: /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; + InstSeqNum squashedSeqNum[Impl::MaxThreads]; /** Is the ROB done squashing. */ bool doneSquashing[Impl::MaxThreads]; diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index d9978b17f7..1b9f666b82 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -41,10 +41,10 @@ ROB::ROB(unsigned _numEntries, unsigned _squashWidth, : numEntries(_numEntries), squashWidth(_squashWidth), numInstsInROB(0), - squashedSeqNum(0), numThreads(_numThreads) { for (int tid=0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; doneSquashing[tid] = true; threadEntries[tid] = 0; } @@ -352,11 +352,11 @@ void ROB::doSquash(unsigned tid) { DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", - tid, squashedSeqNum); + tid, squashedSeqNum[tid]); assert(squashIt[tid] != instList[tid].end()); - if ((*squashIt[tid])->seqNum < squashedSeqNum) { + if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -371,7 +371,7 @@ ROB::doSquash(unsigned tid) for (int numSquashed = 0; numSquashed < squashWidth && squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum; + (*squashIt[tid])->seqNum > squashedSeqNum[tid]; ++numSquashed) { DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", @@ -408,7 +408,7 @@ ROB::doSquash(unsigned tid) // Check if ROB is done squashing. - if ((*squashIt[tid])->seqNum <= squashedSeqNum) { + if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) { DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", tid); @@ -520,7 +520,7 @@ ROB::squash(InstSeqNum squash_num,unsigned tid) doneSquashing[tid] = false; - squashedSeqNum = squash_num; + squashedSeqNum[tid] = squash_num; if (!instList[tid].empty()) { InstIt tail_thread = instList[tid].end(); @@ -544,6 +544,7 @@ ROB::readHeadInst() } } */ + template typename Impl::DynInstPtr ROB::readHeadInst(unsigned tid) @@ -558,6 +559,7 @@ ROB::readHeadInst(unsigned tid) return dummyInst; } } + /* template uint64_t From 7811500eefc57d8f9f00845b9187d9a1a6ef6655 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 7 Jul 2006 16:02:22 -0400 Subject: [PATCH 122/152] Fix address range calculation. Still need bus to handle snoop ranges. On the way towards multi-level caches (L2) src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: Fix address range calculation. Still need bus to handle snoop ranges. --HG-- extra : convert_revision : 800078d88aab5e563f4a9bb599f91cd44f36e625 --- src/mem/cache/base_cache.cc | 10 +++++++++- src/mem/cache/base_cache.hh | 29 +++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index b2caca765c..be9769fdc1 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -59,7 +59,7 @@ void BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) { - cache->getAddressRanges(resp, snoop); + cache->getAddressRanges(resp, snoop, isCpuSide); } int @@ -166,6 +166,14 @@ BaseCache::getPort(const std::string &if_name, int idx) else panic("Port name %s unrecognized\n", if_name); } +void +BaseCache::init() +{ + if (!cpuSidePort || !memSidePort) + panic("Cache not hooked up on both sides\n"); + cpuSidePort->sendStatusChange(Port::RangeChange); +} + void BaseCache::regStats() { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index f832735dbe..0d1bfdfdbd 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -143,9 +143,19 @@ class BaseCache : public MemObject fatal("No implementation"); } - virtual void recvStatusChange(Port::Status status, bool isCpuSide) + void recvStatusChange(Port::Status status, bool isCpuSide) { - fatal("No implementation"); + if (status == Port::RangeChange) + { + if (!isCpuSide) + { + cpuSidePort->sendStatusChange(Port::RangeChange); + } + else + { + memSidePort->sendStatusChange(Port::RangeChange); + } + } } virtual Packet *getPacket() @@ -320,6 +330,8 @@ class BaseCache : public MemObject memSidePort = NULL; } + virtual void init(); + /** * Query block size of a cache. * @return The block size @@ -519,9 +531,18 @@ class BaseCache : public MemObject */ void rangeChange() {} - void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) + void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide) { - panic("Unimplimented\n"); + if (isCpuSide) + { + AddrRangeList dummy; + memSidePort->getPeerAddressRanges(resp, dummy); + } + else + { + //This is where snoops get updated + return; + } } }; From 1faada9bd98a6425624a97813d4c8cdc5b78aa1f Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 7 Jul 2006 16:46:08 -0400 Subject: [PATCH 123/152] Take the name of the checkpoint directory in when calling checkpoint() or restoreCheckpoint(). src/sim/main.cc: src/sim/serialize.cc: src/sim/serialize.hh: Take in the directory name when checkpointing. --HG-- extra : convert_revision : 040e828622480f1051e2156f4439e24864c38d45 --- src/python/m5/__init__.py | 10 +++++----- src/sim/main.cc | 8 ++++---- src/sim/serialize.cc | 10 ++++++---- src/sim/serialize.hh | 6 +++--- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index dc3af70003..f4f5be2d1a 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -34,7 +34,7 @@ import cc_main # import a few SWIG-wrapped items (those that are likely to be used # directly by user scripts) completely into this module for # convenience -from cc_main import simulate, SimLoopExitEvent, setCheckpointDir +from cc_main import simulate, SimLoopExitEvent # import the m5 compile options import defines @@ -239,17 +239,17 @@ def drain(root): def resume(root): root.resume() -def checkpoint(root): +def checkpoint(root, dir): if not isinstance(root, objects.Root): raise TypeError, "Object is not a root object. Checkpoint must be called on a root object." doDrain(root) print "Writing checkpoint" - cc_main.serializeAll() + cc_main.serializeAll(dir) resume(root) -def restoreCheckpoint(root): +def restoreCheckpoint(root, dir): print "Restoring from checkpoint" - cc_main.unserializeAll() + cc_main.unserializeAll(dir) resume(root) def changeToAtomic(system): diff --git a/src/sim/main.cc b/src/sim/main.cc index e96a449307..5f34f6520f 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -542,15 +542,15 @@ cleanupCountedDrain(Event *counted_drain) } void -serializeAll() +serializeAll(const std::string &cpt_dir) { - Serializable::serializeAll(); + Serializable::serializeAll(cpt_dir); } void -unserializeAll() +unserializeAll(const std::string &cpt_dir) { - Serializable::unserializeAll(); + Serializable::unserializeAll(cpt_dir); } /** diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc index 7450d7b7e0..6a1d084b7b 100644 --- a/src/sim/serialize.cc +++ b/src/sim/serialize.cc @@ -231,8 +231,9 @@ Globals::unserialize(Checkpoint *cp) } void -Serializable::serializeAll() +Serializable::serializeAll(const std::string &cpt_dir) { + setCheckpointDir(cpt_dir); string dir = Checkpoint::dir(); if (mkdir(dir.c_str(), 0775) == -1 && errno != EEXIST) fatal("couldn't mkdir %s\n", dir); @@ -247,8 +248,9 @@ Serializable::serializeAll() } void -Serializable::unserializeAll() +Serializable::unserializeAll(const std::string &cpt_dir) { + setCheckpointDir(cpt_dir); string dir = Checkpoint::dir(); string cpt_file = dir + Checkpoint::baseFilename; string section = ""; @@ -289,9 +291,9 @@ Checkpoint::dir() } void -debug_serialize() +debug_serialize(const std::string &cpt_dir) { - Serializable::serializeAll(); + Serializable::serializeAll(cpt_dir); } diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index a80dc99e4c..880fb07858 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -126,8 +126,8 @@ class Serializable static int ckptCount; static int ckptMaxCount; static int ckptPrevCount; - static void serializeAll(); - static void unserializeAll(); + static void serializeAll(const std::string &cpt_dir); + static void unserializeAll(const std::string &cpt_dir); static void unserializeGlobals(Checkpoint *cp); }; @@ -206,7 +206,7 @@ SerializableClass the##OBJ_CLASS##Class(CLASS_NAME, \ OBJ_CLASS::createForUnserialize); void -setCheckpointName(const std::string &name); +setCheckpointDir(const std::string &name); class Checkpoint { From 744e0055b704f4c202e765f46e4fd4f56e1b847c Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 7 Jul 2006 16:48:44 -0400 Subject: [PATCH 124/152] Fix for bug when draining and a memory access is outstanding. --HG-- extra : convert_revision : 1af782cf023ae74c2a3ff9f7aefcea880bc87936 --- src/cpu/o3/fetch_impl.hh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index c0cc189f27..03836f47cb 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -354,22 +354,23 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) // to return. if (fetchStatus[tid] != IcacheWaitResponse || pkt->req != memReq[tid] || - isSwitchedOut() || - drainPending) { + isSwitchedOut()) { ++fetchIcacheSquashes; delete pkt->req; delete pkt; return; } - // Wake up the CPU (if it went to sleep and was waiting on this completion - // event). - cpu->wakeCPU(); + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); - DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", - tid); + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); - switchToActive(); + switchToActive(); + } // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { @@ -509,7 +510,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid unsigned flags = 0; #endif // FULL_SYSTEM - if (cacheBlocked || (interruptPending && flags == 0) || drainPending) { + if (cacheBlocked || (interruptPending && flags == 0)) { // Hold off fetch from getting new instructions when: // Cache is blocked, or // while an interrupt is pending and we're not in PAL mode, or @@ -909,7 +910,7 @@ DefaultFetch::fetch(bool &status_change) ////////////////////////////////////////// int tid = getFetchingThread(fetchPolicy); - if (tid == -1) { + if (tid == -1 || drainPending) { DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); // Breaks looping condition in tick() From 8ade33d324218737c815935120307153975eeadc Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 7 Jul 2006 17:33:24 -0400 Subject: [PATCH 125/152] Support Ron's changes for hooking up ports. src/cpu/checker/cpu.hh: Now that BaseCPU is a MemObject, the checker must define this function. src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/iew.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: Implement getPort function so the connector can connect the ports properly. src/cpu/o3/fetch_impl.hh: src/cpu/o3/lsq_unit_impl.hh: The connector handles connecting the ports now. src/python/m5/objects/O3CPU.py: Add ports to the parameters. --HG-- extra : convert_revision : 0b1a216b9a5d0574e62165d7c6c242498104d918 --- src/cpu/checker/cpu.hh | 6 ++++++ src/cpu/o3/cpu.cc | 12 ++++++++++++ src/cpu/o3/cpu.hh | 3 +++ src/cpu/o3/fetch.hh | 3 +++ src/cpu/o3/fetch_impl.hh | 4 ---- src/cpu/o3/iew.hh | 3 +++ src/cpu/o3/lsq.hh | 7 +++++++ src/cpu/o3/lsq_unit.hh | 5 +++++ src/cpu/o3/lsq_unit_impl.hh | 4 ---- src/python/m5/objects/O3CPU.py | 2 ++ 10 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index b520e1be0d..a508c56ba2 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -127,6 +127,12 @@ class CheckerCPU : public BaseCPU Port *dcachePort; + virtual Port *getPort(const std::string &name, int idx) + { + panic("Not supported on checker!"); + return NULL; + } + public: // Primary thread being run. SimpleThread *thread; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ceba74ef3c..a9a1a7c9b2 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -360,6 +360,18 @@ FullO3CPU::fullCPURegStats() } +template +Port * +FullO3CPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return iew.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + template void FullO3CPU::tick() diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 5b881e5580..f85de64e54 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -208,6 +208,9 @@ class FullO3CPU : public BaseO3CPU /** Registers statistics. */ void fullCPURegStats(); + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + /** Ticks CPU, calling tick() on each stage, and checking the overall * activity to see if the CPU should deschedule itself. */ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index a793c73612..85654cebc7 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -162,6 +162,9 @@ class DefaultFetch /** Registers statistics. */ void regStats(); + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + /** Sets CPU pointer. */ void setCPU(O3CPU *cpu_ptr); diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 03836f47cb..de883b5ba2 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -280,10 +280,6 @@ DefaultFetch::setCPU(O3CPU *cpu_ptr) // Name is finally available, so create the port. icachePort = new IcachePort(this); - Port *mem_dport = mem->getPort(""); - icachePort->setPeer(mem_dport); - mem_dport->setPeer(icachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(icachePort); diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 4908a66495..fb9afde54a 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -125,6 +125,9 @@ class DefaultIEW /** Initializes stage; sends back the number of free IQ and LSQ entries. */ void initStage(); + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + /** Sets CPU pointer for IEW, IQ, and LSQ. */ void setCPU(O3CPU *cpu_ptr); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 89791fec95..d5890950f0 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -65,6 +65,13 @@ class LSQ { /** Registers statistics of each LSQ unit. */ void regStats(); + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return thread[0].getDcachePort(); } + /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); /** Sets the CPU pointer. */ diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 74b8fe5bbb..4d7a8350b6 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -77,6 +77,11 @@ class LSQUnit { /** Returns the name of the LSQ unit. */ std::string name() const; + /** Returns the dcache port. + * @todo: Remove this once the port moves up to the LSQ level. + */ + Port *getDcachePort() { return dcachePort; } + /** Registers statistics. */ void regStats(); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index bb3da7eec1..8e951534f2 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -182,10 +182,6 @@ LSQUnit::setCPU(O3CPU *cpu_ptr) cpu = cpu_ptr; dcachePort = new DcachePort(cpu, this); - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setDcachePort(dcachePort); diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py index 9ccbdcf53b..6ba62b47e9 100644 --- a/src/python/m5/objects/O3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -10,6 +10,8 @@ class DerivO3CPU(BaseCPU): checker = Param.BaseCPU(NULL, "checker") cachePorts = Param.Unsigned("Cache Ports") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") renameToFetchDelay = Param.Unsigned("Rename to fetch delay") From 43245d9c2f3986430c1fbc4a09ee90096f6d3f30 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Fri, 7 Jul 2006 18:24:13 -0400 Subject: [PATCH 126/152] Support for recent port changes. src/cpu/ozone/cpu.hh: src/cpu/ozone/cpu_impl.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/front_end_impl.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/lw_lsq_impl.hh: src/python/m5/objects/OzoneCPU.py: Support Ron's recent port changes. src/cpu/ozone/lw_back_end_impl.hh: Support Ron's recent port changes. Also support handling faults in SE. --HG-- extra : convert_revision : aa1ba5111b70199c052da3e13bae605525a69891 --- src/cpu/ozone/cpu.hh | 2 ++ src/cpu/ozone/cpu_impl.hh | 12 ++++++++++++ src/cpu/ozone/front_end.hh | 2 ++ src/cpu/ozone/front_end_impl.hh | 5 +---- src/cpu/ozone/lw_back_end.hh | 2 ++ src/cpu/ozone/lw_back_end_impl.hh | 12 ++++-------- src/cpu/ozone/lw_lsq.hh | 18 ++++++------------ src/cpu/ozone/lw_lsq_impl.hh | 15 ++++++--------- src/python/m5/objects/OzoneCPU.py | 3 +++ 9 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 8993781ea0..1ec8b70e64 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -372,6 +372,8 @@ class OzoneCPU : public BaseCPU PhysicalMemory *physmem; #endif + virtual Port *getPort(const std::string &name, int idx); + MemObject *mem; FrontEnd *frontEnd; diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index ccb1c8418d..50ed943121 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -418,6 +418,18 @@ OzoneCPU::init() thread.inSyscall = false; } +template +Port * +OzoneCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return backEnd->getDcachePort(); + else if (if_name == "icache_port") + return frontEnd->getIcachePort(); + else + panic("No Such Port\n"); +} + template void OzoneCPU::serialize(std::ostream &os) diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh index 181609098c..3ed3c4d18d 100644 --- a/src/cpu/ozone/front_end.hh +++ b/src/cpu/ozone/front_end.hh @@ -119,6 +119,8 @@ class FrontEnd void regStats(); + Port *getIcachePort() { return &icachePort; } + void tick(); Fault fetchCacheLine(); void processInst(DynInstPtr &inst); diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh index 40042489da..9da9373203 100644 --- a/src/cpu/ozone/front_end_impl.hh +++ b/src/cpu/ozone/front_end_impl.hh @@ -36,6 +36,7 @@ #include "cpu/thread_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" +#include "mem/mem_object.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -138,10 +139,6 @@ FrontEnd::setCPU(CPUType *cpu_ptr) icachePort.setName(this->name() + "-iport"); - Port *mem_dport = mem->getPort(""); - icachePort.setPeer(mem_dport); - mem_dport->setPeer(&icachePort); - #if USE_CHECKER if (cpu->checker) { cpu->checker->setIcachePort(&icachePort); diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index bb3ef3a72a..cc36c611ee 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -114,6 +114,8 @@ class LWBackEnd void setCommBuffer(TimeBuffer *_comm); + Port *getDcachePort() { return LSQ.getDcachePort(); } + void tick(); void squash(); void generateTCEvent() { tcSquash = true; } diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh index a73d3ee6ee..a4f1d805eb 100644 --- a/src/cpu/ozone/lw_back_end_impl.hh +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -142,7 +142,7 @@ LWBackEnd::replayMemInst(DynInstPtr &inst) template LWBackEnd::LWBackEnd(Params *params) : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), tcSquash(false), LSQ(params), + trapSquash(false), tcSquash(false), width(params->backEndWidth), exactFullStall(true) { numROBEntries = params->numROBEntries; @@ -557,6 +557,7 @@ LWBackEnd::checkInterrupts() } } } +#endif template void @@ -580,7 +581,6 @@ LWBackEnd::handleFault(Fault &fault, Tick latency) // Generate trap squash event. generateTrapEvent(latency); } -#endif template void @@ -602,6 +602,7 @@ LWBackEnd::tick() #if FULL_SYSTEM checkInterrupts(); +#endif if (trapSquash) { assert(!tcSquash); @@ -609,7 +610,6 @@ LWBackEnd::tick() } else if (tcSquash) { squashFromTC(); } -#endif if (dispatchStatus != Blocked) { dispatchInsts(); @@ -1137,13 +1137,9 @@ LWBackEnd::commitInst(int inst_num) thread->setInst( static_cast(inst->staticInst->machInst)); -#if FULL_SYSTEM + handleFault(inst_fault); return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM } int freed_regs = 0; diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index c749e3aeea..2eb09d01ae 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -91,8 +91,7 @@ class OzoneLWLSQ { void setBE(BackEnd *be_ptr) { be = be_ptr; } - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); + Port *getDcachePort() { return &dcachePort; } /** Ticks the LSQ unit, which in this case only resets the number of * used cache ports. @@ -241,13 +240,11 @@ class OzoneLWLSQ { class DcachePort : public Port { protected: - OzoneCPU *cpu; - OzoneLWLSQ *lsq; public: - DcachePort(OzoneCPU *_cpu, OzoneLWLSQ *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) + DcachePort(OzoneLWLSQ *_lsq) + : lsq(_lsq) { } protected: @@ -266,11 +263,8 @@ class OzoneLWLSQ { virtual void recvRetry(); }; - /** Pointer to the D-cache. */ - DcachePort *dcachePort; - - /** Pointer to the page table. */ -// PageTable *pTable; + /** D-cache port. */ + DcachePort dcachePort; public: struct SQEntry { @@ -639,7 +633,7 @@ OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) data_pkt->senderState = state; // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + if (!dcachePort.sendTiming(data_pkt)) { // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) return NoFault; diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh index a65a2a4d37..88e9c218f7 100644 --- a/src/cpu/ozone/lw_lsq_impl.hh +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -131,8 +131,9 @@ OzoneLWLSQ::completeDataAccess(PacketPtr pkt) template OzoneLWLSQ::OzoneLWLSQ() - : switchedOut(false), loads(0), stores(0), storesToWB(0), stalled(false), - isStoreBlocked(false), isLoadBlocked(false), loadBlockedHandled(false) + : switchedOut(false), dcachePort(this), loads(0), stores(0), + storesToWB(0), stalled(false), isStoreBlocked(false), + isLoadBlocked(false), loadBlockedHandled(false) { } @@ -175,15 +176,11 @@ void OzoneLWLSQ::setCPU(OzoneCPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); - - Port *mem_dport = mem->getPort(""); - dcachePort->setPeer(mem_dport); - mem_dport->setPeer(dcachePort); + dcachePort.setName(this->name() + "-dport"); #if USE_CHECKER if (cpu->checker) { - cpu->checker->setDcachePort(dcachePort); + cpu->checker->setDcachePort(&dcachePort); } #endif } @@ -614,7 +611,7 @@ OzoneLWLSQ::writebackStores() state->noWB = true; } - if (!dcachePort->sendTiming(data_pkt)) { + if (!dcachePort.sendTiming(data_pkt)) { // Need to handle becoming blocked on a store. isStoreBlocked = true; assert(retryPkt == NULL); diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py index 8aff89203f..88fb63c748 100644 --- a/src/python/m5/objects/OzoneCPU.py +++ b/src/python/m5/objects/OzoneCPU.py @@ -9,6 +9,9 @@ class DerivOzoneCPU(BaseCPU): checker = Param.BaseCPU("Checker CPU") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") + width = Param.Unsigned("Width") frontEndWidth = Param.Unsigned("Front end width") backEndWidth = Param.Unsigned("Back end width") From 6c9bde608b9a58ba876e5c243a896d25e50bc500 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 10 Jul 2006 12:03:13 -0400 Subject: [PATCH 127/152] Fix cpu in full system to match SE. --HG-- extra : convert_revision : 95e422221ff5bab6104925d50a8882d31729b0f5 --- src/cpu/base.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 548f012dfc..ce440aeffc 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -59,7 +59,7 @@ int maxThreadsPerCPU = 1; #if FULL_SYSTEM BaseCPU::BaseCPU(Params *p) - : SimObject(p->name), clock(p->clock), checkInterrupts(true), + : MemObject(p->name), clock(p->clock), checkInterrupts(true), params(p), number_of_threads(p->numberOfThreads), system(p->system) #else BaseCPU::BaseCPU(Params *p) From aef232a942935e01c26522c20cc7fb3609075a7d Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 10 Jul 2006 12:07:21 -0400 Subject: [PATCH 128/152] Update FS configs to use cpu connectors for ports --HG-- extra : convert_revision : 1e2e503401f92c1f30e2e487d7aeed1c7c5b7ee4 --- configs/test/fs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/test/fs.py b/configs/test/fs.py index e0dd38e415..cd894ab730 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -190,6 +190,8 @@ class MyLinuxAlphaSystem(LinuxAlphaSystem): else: cpu = AtomicSimpleCPU() cpu.mem = magicbus2 + cpu.icache_port = magicbus2.port + cpu.dcache_port = magicbus2.port cpu.itb = AlphaITB() cpu.dtb = AlphaDTB() sim_console = SimConsole(listener=ConsoleListener(port=3456)) From 39ffd24b648d45cf4e2b130ea5465ae9f57ac125 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 10 Jul 2006 12:35:18 -0400 Subject: [PATCH 129/152] Fix offset calculation. Now L2's work with timing&atomic. src/mem/packet.hh: Offset is based on packet, not request. --HG-- extra : convert_revision : d85af5838370541328ca35072c612d8198020625 --- src/mem/packet.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 1325dfc5b8..534db00779 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -247,7 +247,7 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } - Addr getOffset(int blkSize) const { return req->getPaddr() & (Addr)(blkSize - 1); } + Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); } void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } void cmdOverride(Command newCmd) { cmd = newCmd; } From 5584e2b26eccb5d2bf445b8b0b2040449d0b0a77 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 10 Jul 2006 12:42:35 -0400 Subject: [PATCH 130/152] Update config for a system with an L2 --HG-- extra : convert_revision : c73a532ad6ad8d5115bda81fa778a4b97fbab713 From fcaafdc48cc624825760cb3ba7bbc28e5db6acfa Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 10 Jul 2006 15:40:28 -0400 Subject: [PATCH 131/152] Add parameters for backwards and forwards sizes for time buffers. src/base/timebuf.hh: Add a function to return the size of the time buffer. --HG-- extra : convert_revision : 8ffacd8b9013eb76264df065244e00dc1460efd4 --- src/base/timebuf.hh | 5 +++++ src/cpu/o3/alpha/cpu_builder.cc | 12 +++++++++--- src/cpu/o3/cpu.cc | 20 +++++++++----------- src/cpu/o3/iew_impl.hh | 5 ++--- src/cpu/o3/params.hh | 6 ++++++ src/python/m5/objects/O3CPU.py | 3 +++ 6 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/base/timebuf.hh b/src/base/timebuf.hh index 160a97034d..a484a3179a 100644 --- a/src/base/timebuf.hh +++ b/src/base/timebuf.hh @@ -215,6 +215,11 @@ class TimeBuffer { return wire(this, 0); } + + int getSize() + { + return size; + } }; #endif // __BASE_TIMEBUF_HH__ diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index d61eee4b12..5e767655d5 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -102,7 +102,9 @@ Param renameToROBDelay; Param commitWidth; Param squashWidth; Param trapLatency; -Param fetchTrapLatency; + +Param backComSize; +Param forwardComSize; Param predType; Param localPredictorSize; @@ -222,7 +224,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(commitWidth, "Commit width"), INIT_PARAM(squashWidth, "Squash width"), INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), - INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), + + INIT_PARAM(backComSize, "Time buffer size for backwards communication"), + INIT_PARAM(forwardComSize, "Time buffer size for forward communication"), INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"), INIT_PARAM(localPredictorSize, "Size of local predictor"), @@ -350,7 +354,9 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->commitWidth = commitWidth; params->squashWidth = squashWidth; params->trapLatency = trapLatency; - params->fetchTrapLatency = fetchTrapLatency; + + params->backComSize = backComSize; + params->forwardComSize = forwardComSize; params->predType = predType; params->localPredictorSize = localPredictorSize; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index a9a1a7c9b2..d53859b8b4 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -141,15 +141,14 @@ FullO3CPU::FullO3CPU(Params *params) TheISA::NumMiscRegs * number_of_threads, TheISA::ZeroReg), - // For now just have these time buffers be pretty big. - // @todo: Make these time buffer sizes parameters or derived - // from latencies - timeBuffer(5, 5), - fetchQueue(5, 5), - decodeQueue(5, 5), - renameQueue(5, 5), - iewQueue(5, 5), - activityRec(NumStages, 10, params->activity), + timeBuffer(params->backComSize, params->forwardComSize), + fetchQueue(params->backComSize, params->forwardComSize), + decodeQueue(params->backComSize, params->forwardComSize), + renameQueue(params->backComSize, params->forwardComSize), + iewQueue(params->backComSize, params->forwardComSize), + activityRec(NumStages, + params->backComSize + params->forwardComSize, + params->activity), globalSeqNum(1), @@ -214,7 +213,6 @@ FullO3CPU::FullO3CPU(Params *params) commit.setIEWQueue(&iewQueue); commit.setRenameQueue(&renameQueue); - commit.setFetchStage(&fetch); commit.setIEWStage(&iew); rename.setIEWStage(&iew); rename.setCommitStage(&commit); @@ -851,7 +849,7 @@ void FullO3CPU::takeOverFrom(BaseCPU *oldCPU) { // Flush out any old data from the time buffers. - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < timeBuffer.getSize(); ++i) { timeBuffer.advance(); fetchQueue.advance(); decodeQueue.advance(); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 0d82645e33..684ae22951 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -42,8 +42,7 @@ using namespace std; template DefaultIEW::DefaultIEW(Params *params) - : // @todo: Make this into a parameter. - issueToExecQueue(5, 5), + : issueToExecQueue(params->backComSize, params->forwardComSize), instQueue(params), ldstQueue(params), fuPool(params->fuPool), @@ -413,7 +412,7 @@ DefaultIEW::takeOverFrom() updateLSQNextCycle = false; // @todo: Fix hardcoded number - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { issueToExecQueue.advance(); } } diff --git a/src/cpu/o3/params.hh b/src/cpu/o3/params.hh index ed53fa97ae..1c234bcd76 100755 --- a/src/cpu/o3/params.hh +++ b/src/cpu/o3/params.hh @@ -114,6 +114,12 @@ class O3Params : public BaseO3CPU::Params Tick trapLatency; Tick fetchTrapLatency; + // + // Timebuffer sizes + // + unsigned backComSize; + unsigned forwardComSize; + // // Branch predictor (BP, BTB, RAS) // diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py index 6ba62b47e9..d6bc454ad7 100644 --- a/src/python/m5/objects/O3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -53,6 +53,9 @@ class DerivO3CPU(BaseCPU): trapLatency = Param.Tick("Trap latency") fetchTrapLatency = Param.Tick("Fetch trap latency") + backComSize = Param.Unsigned("Time buffer size for backwards communication") + forwardComSize = Param.Unsigned("Time buffer size for forward communication") + predType = Param.String("Branch predictor type ('local', 'tournament')") localPredictorSize = Param.Unsigned("Size of local predictor") localCtrBits = Param.Unsigned("Bits per counter") From f60d8217e3c9fd7c4ea75ab0c89dbbd63db75ffd Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 10 Jul 2006 15:41:28 -0400 Subject: [PATCH 132/152] Some minor cleanups. src/cpu/SConscript: Change the error message to be slightly nicer. src/cpu/o3/commit.hh: Remove old code. src/cpu/o3/commit_impl.hh: Remove old unused code. --HG-- extra : convert_revision : 48aa430e1f3554007dd5e4f3d9e89b5e4f124390 --- src/cpu/SConscript | 7 ++++--- src/cpu/o3/commit.hh | 12 ------------ src/cpu/o3/commit_impl.hh | 14 +------------- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 1d0a015b0f..bc4ec7923d 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -188,9 +188,10 @@ if env['USE_CHECKER']: if i in env['CPU_MODELS']: checker_supports = True if not checker_supports: - print "Checker only supports CPU models %s, please " \ - "set USE_CHECKER=False or use one of those CPU models" \ - % CheckerSupportedCPUList + print "Checker only supports CPU models", + for i in CheckerSupportedCPUList: + print i, + print ", please set USE_CHECKER=False or use one of those CPU models" Exit(1) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index c39bc10f98..956b6ec3e6 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -162,10 +162,6 @@ class DefaultCommit /** Sets the pointer to the queue coming from IEW. */ void setIEWQueue(TimeBuffer *iq_ptr); - void setFetchStage(Fetch *fetch_stage); - - Fetch *fetchStage; - /** Sets the pointer to the IEW stage. */ void setIEWStage(IEW *iew_stage); @@ -335,10 +331,6 @@ class DefaultCommit /** Vector of all of the threads. */ std::vector thread; - Fault fetchFault; - - int fetchTrapWait; - /** Records that commit has written to the time buffer this cycle. Used for * the CPU to determine if it can deschedule itself if there is no activity. */ @@ -397,10 +389,6 @@ class DefaultCommit */ Tick trapLatency; - Tick fetchTrapLatency; - - Tick fetchFaultTick; - /** The commit PC of each thread. Refers to the instruction that * is currently being processed/committed. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 39e1cf3fe4..c3b4fa7f66 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -82,8 +82,7 @@ DefaultCommit::DefaultCommit(Params *params) numThreads(params->numberOfThreads), drainPending(false), switchedOut(false), - trapLatency(params->trapLatency), - fetchTrapLatency(params->fetchTrapLatency) + trapLatency(params->trapLatency) { _status = Active; _nextStatus = Inactive; @@ -123,9 +122,6 @@ DefaultCommit::DefaultCommit(Params *params) tcSquash[i] = false; PC[i] = nextPC[i] = 0; } - - fetchFaultTick = 0; - fetchTrapWait = 0; } template @@ -235,7 +231,6 @@ DefaultCommit::setCPU(O3CPU *cpu_ptr) cpu->activateStage(O3CPU::CommitIdx); trapLatency = cpu->cycles(trapLatency); - fetchTrapLatency = cpu->cycles(fetchTrapLatency); } template @@ -292,13 +287,6 @@ DefaultCommit::setIEWQueue(TimeBuffer *iq_ptr) fromIEW = iewQueue->getWire(-iewToCommitDelay); } -template -void -DefaultCommit::setFetchStage(Fetch *fetch_stage) -{ - fetchStage = fetch_stage; -} - template void DefaultCommit::setIEWStage(IEW *iew_stage) From 185a5502b7fb1bf46a1582749aeadb730106b4fb Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Mon, 10 Jul 2006 16:31:42 -0400 Subject: [PATCH 133/152] Minor fixes. src/cpu/checker/thread_context.hh: src/cpu/ozone/cpu.hh: src/cpu/ozone/cpu_impl.hh: Change functions to match Korey's changes. src/cpu/ozone/lw_back_end.hh: Fix compile error. --HG-- extra : convert_revision : fb11ac2d6db3a75c1cdbad2c1c02f921ad7344a6 --- src/cpu/checker/thread_context.hh | 2 +- src/cpu/ozone/cpu.hh | 4 ++-- src/cpu/ozone/cpu_impl.hh | 6 +++--- src/cpu/ozone/lw_back_end.hh | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index c0ac8f01d5..c035e92ace 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -120,7 +120,7 @@ class CheckerThreadContext : public ThreadContext void suspend() { actualTC->suspend(); } /// Set the status to Unallocated. - void deallocate() { actualTC->deallocate(); } + void deallocate(int delay = 0) { actualTC->deallocate(delay); } /// Set the status to Halted. void halt() { actualTC->halt(); } diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 1ec8b70e64..e411c12bd5 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -150,7 +150,7 @@ class OzoneCPU : public BaseCPU void suspend(); /// Set the status to Unallocated. - void deallocate(); + void deallocate(int delay = 0); /// Set the status to Halted. void halt(); @@ -385,7 +385,7 @@ class OzoneCPU : public BaseCPU virtual void activateContext(int thread_num, int delay); virtual void suspendContext(int thread_num); - virtual void deallocateContext(int thread_num); + virtual void deallocateContext(int thread_num, int delay); virtual void haltContext(int thread_num); // statistics diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 50ed943121..f58b81990b 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -335,7 +335,7 @@ OzoneCPU::suspendContext(int thread_num) template void -OzoneCPU::deallocateContext(int thread_num) +OzoneCPU::deallocateContext(int thread_num, int delay) { // for now, these are equivalent suspendContext(thread_num); @@ -792,9 +792,9 @@ OzoneCPU::OzoneTC::suspend() /// Set the status to Unallocated. template void -OzoneCPU::OzoneTC::deallocate() +OzoneCPU::OzoneTC::deallocate(int delay) { - cpu->deallocateContext(thread->readTid()); + cpu->deallocateContext(thread->readTid(), delay); } /// Set the status to Halted. diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh index cc36c611ee..d836ceebd6 100644 --- a/src/cpu/ozone/lw_back_end.hh +++ b/src/cpu/ozone/lw_back_end.hh @@ -51,6 +51,8 @@ class ThreadContext; template class OzoneThreadState; +class Port; + template class LWBackEnd { From 6592045cbc138306474d24d60daa222a07673fe2 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Mon, 10 Jul 2006 17:16:15 -0400 Subject: [PATCH 134/152] Some fixes so that MSHR's are matched and we don't issue overlapping requests with detailed cpu src/mem/cache/base_cache.cc: If we still have outstanding requests, need to schedule event again src/mem/cache/miss/miss_queue.cc: Need to use block size so overlapping requests match in the MSHR's src/mem/cache/miss/mshr.cc: Actually save the address, otherwise we can't match MSHR's --HG-- extra : convert_revision : f0f018b89c2fb99f3ce8d6eafc0712ee8edeeda8 --- src/mem/cache/base_cache.cc | 17 +++++++++++++++-- src/mem/cache/miss/miss_queue.cc | 8 ++++---- src/mem/cache/miss/mshr.cc | 1 + 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index be9769fdc1..451da28e8b 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -117,11 +117,24 @@ BaseCache::CacheEvent::process() if (!pkt) { if (!cachePort->isCpuSide) + { pkt = cachePort->cache->getPacket(); + bool success = cachePort->sendTiming(pkt); + DPRINTF(Cache, "Address %x was %s in sending the timing request\n", + pkt->getAddr(), success ? "succesful" : "unsuccesful"); + cachePort->cache->sendResult(pkt, success); + if (success && cachePort->cache->doMasterRequest()) + { + //Still more to issue, rerequest in 1 cycle + pkt = NULL; + this->schedule(curTick+1); + } + } else + { pkt = cachePort->cache->getCoherencePacket(); - bool success = cachePort->sendTiming(pkt); - cachePort->cache->sendResult(pkt, success); + cachePort->sendTiming(pkt); + } return; } //Know the packet to send, no need to mark in service (must succed) diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index da0448ad3a..4a3dc1062a 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -352,7 +352,7 @@ MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) MSHR* MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) { - MSHR* mshr = mq.allocate(pkt, size); + MSHR* mshr = mq.allocate(pkt, blkSize); mshr->order = order++; if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { // Mark this as a cache line fill @@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,pkt->getSize()); + MSHR* mshr = wb.allocate(pkt,blkSize); mshr->order = order++; //REMOVING COMPRESSION FOR NOW @@ -446,11 +446,11 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, pkt->getSize(), time); + mshr = allocateWrite(pkt, blkSize, time); return; } - mshr = allocateMiss(pkt, size, time); + mshr = allocateMiss(pkt, blkSize, time); } MSHR* diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 1a85d3018c..db2f40c56d 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -57,6 +57,7 @@ void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { + addr = _addr; if (target) { //Have a request, just use it From 55ea050d4823ca294db94d6a1f7f2fc35177e044 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Mon, 10 Jul 2006 23:00:13 -0400 Subject: [PATCH 135/152] Migrate most of main() and and all option parsing to python configs/test/fs.py: configs/test/test.py: update for the new way that m5 deals with options src/python/SConscript: Compile AUTHORS, LICENSE, README, and RELEASE_NOTES into the python stuff. src/python/m5/__init__.py: redo the way options work. Move them all to main.py src/sim/main.cc: Migrate more functionality for main() into python. Namely option parsing src/python/m5/attrdict.py: A dictionary object that overrides attribute access to do item access. src/python/m5/main.py: The new location for M5's option parsing, and the main() routine to set up the simulation. --HG-- extra : convert_revision : c86b87a9f508bde1994088e23fd470c7753ee4c1 --- configs/test/fs.py | 6 +- configs/test/test.py | 6 +- src/python/SConscript | 15 +- src/python/m5/__init__.py | 110 +------------- src/python/m5/attrdict.py | 61 ++++++++ src/python/m5/main.py | 306 ++++++++++++++++++++++++++++++++++++++ src/sim/main.cc | 160 ++------------------ 7 files changed, 398 insertions(+), 266 deletions(-) create mode 100644 src/python/m5/attrdict.py create mode 100644 src/python/m5/main.py diff --git a/configs/test/fs.py b/configs/test/fs.py index cd894ab730..d191f70558 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -1,14 +1,14 @@ +import optparse, os, sys + import m5 from m5.objects import * -import os,optparse,sys from SysPaths import * -parser = optparse.OptionParser(option_list=m5.standardOptions) +parser = optparse.OptionParser() parser.add_option("-t", "--timing", action="store_true") (options, args) = parser.parse_args() -m5.setStandardOptions(options) if args: print "Error: script doesn't take any positional arguments" diff --git a/configs/test/test.py b/configs/test/test.py index e7b0971ef3..a2c9f8bb04 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -4,15 +4,16 @@ # MIPS: "m5 test.py -a Mips -c hello_mips" import os, optparse, sys + import m5 from m5.objects import * from FullO3Config import * # parse command-line arguments -parser = optparse.OptionParser(option_list=m5.standardOptions) +parser = optparse.OptionParser() parser.add_option("-c", "--cmd", default="hello", - help="The binary to run in syscall emulation mode.") + help="The binary to run in syscall emulation mode.") parser.add_option("-o", "--options", default="", help="The options to pass to the binary, use \" \" around the entire\ string.") @@ -26,7 +27,6 @@ parser.add_option("-m", "--maxtick", type="int", help="Set the maximum number of ticks to run for") (options, args) = parser.parse_args() -m5.setStandardOptions(options) if args: print "Error: script doesn't take any positional arguments" diff --git a/src/python/SConscript b/src/python/SConscript index 3a9def9a81..c9e7131992 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -75,16 +75,27 @@ def addPkg(pkgdir): # build_env flags. def MakeDefinesPyFile(target, source, env): f = file(str(target[0]), 'w') - print >>f, "m5_build_env = ", - print >>f, source[0] + print >>f, "m5_build_env = ", source[0] f.close() optionDict = dict([(opt, env[opt]) for opt in env.ExportOptions]) env.Command('m5/defines.py', Value(optionDict), MakeDefinesPyFile) +def MakeInfoPyFile(target, source, env): + f = file(str(target[0]), 'w') + for src in source: + data = ''.join(file(src.srcnode().abspath, 'r').xreadlines()) + print >>f, "%s = %s" % (src, repr(data)) + f.close() + +env.Command('m5/info.py', + [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ], + MakeInfoPyFile) + # Now specify the packages & files for the zip archive. addPkg('m5') pyzip_files.append('m5/defines.py') +pyzip_files.append('m5/info.py') pyzip_files.append(join(env['ROOT'], 'util/pbs/jobfile.py')) env.Command(['swig/cc_main_wrap.cc', 'm5/cc_main.py'], diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index f4f5be2d1a..3d0e3defa9 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -27,7 +27,7 @@ # Authors: Nathan Binkert # Steve Reinhardt -import sys, os, time, atexit, optparse +import atexit, os, sys # import the SWIG-wrapped main C++ functions import cc_main @@ -57,111 +57,6 @@ def AddToPath(path): # so place the new dir right after that. sys.path.insert(1, path) - -# The m5 module's pointer to the parsed options object -options = None - - -# User should call this function after calling parse_args() to pass -# parsed standard option values back into the m5 module for -# processing. -def setStandardOptions(_options): - # Set module global var - global options - options = _options - # tell C++ about output directory - cc_main.setOutputDir(options.outdir) - -# Callback to set trace flags. Not necessarily the best way to do -# things in the long run (particularly if we change how these global -# options are handled). -def setTraceFlags(option, opt_str, value, parser): - objects.Trace.flags = value - -def setTraceStart(option, opt_str, value, parser): - objects.Trace.start = value - -def setTraceFile(option, opt_str, value, parser): - objects.Trace.file = value - -def noPCSymbol(option, opt_str, value, parser): - objects.ExecutionTrace.pc_symbol = False - -def noPrintCycle(option, opt_str, value, parser): - objects.ExecutionTrace.print_cycle = False - -def noPrintOpclass(option, opt_str, value, parser): - objects.ExecutionTrace.print_opclass = False - -def noPrintThread(option, opt_str, value, parser): - objects.ExecutionTrace.print_thread = False - -def noPrintEA(option, opt_str, value, parser): - objects.ExecutionTrace.print_effaddr = False - -def noPrintData(option, opt_str, value, parser): - objects.ExecutionTrace.print_data = False - -def printFetchseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_fetchseq = True - -def printCpseq(option, opt_str, value, parser): - objects.ExecutionTrace.print_cpseq = True - -def dumpOnExit(option, opt_str, value, parser): - objects.Trace.dump_on_exit = True - -def debugBreak(option, opt_str, value, parser): - objects.Debug.break_cycles = value - -def statsTextFile(option, opt_str, value, parser): - objects.Statistics.text_file = value - -# Standard optparse options. Need to be explicitly included by the -# user script when it calls optparse.OptionParser(). -standardOptions = [ - optparse.make_option("--outdir", type="string", default="."), - optparse.make_option("--traceflags", type="string", action="callback", - callback=setTraceFlags), - optparse.make_option("--tracestart", type="int", action="callback", - callback=setTraceStart), - optparse.make_option("--tracefile", type="string", action="callback", - callback=setTraceFile), - optparse.make_option("--nopcsymbol", - action="callback", callback=noPCSymbol, - help="Disable PC symbols in trace output"), - optparse.make_option("--noprintcycle", - action="callback", callback=noPrintCycle, - help="Don't print cycle numbers in trace output"), - optparse.make_option("--noprintopclass", - action="callback", callback=noPrintOpclass, - help="Don't print op class type in trace output"), - optparse.make_option("--noprintthread", - action="callback", callback=noPrintThread, - help="Don't print thread number in trace output"), - optparse.make_option("--noprinteffaddr", - action="callback", callback=noPrintEA, - help="Don't print effective address in trace output"), - optparse.make_option("--noprintdata", - action="callback", callback=noPrintData, - help="Don't print result data in trace output"), - optparse.make_option("--printfetchseq", - action="callback", callback=printFetchseq, - help="Print fetch sequence numbers in trace output"), - optparse.make_option("--printcpseq", - action="callback", callback=printCpseq, - help="Print correct path sequence numbers in trace output"), - optparse.make_option("--dumponexit", - action="callback", callback=dumpOnExit, - help="Dump trace buffer on exit"), - optparse.make_option("--debugbreak", type="int", metavar="CYCLE", - action="callback", callback=debugBreak, - help="Cycle to create a breakpoint"), - optparse.make_option("--statsfile", type="string", action="callback", - callback=statsTextFile, metavar="FILE", - help="Sets the output file for the statistics") - ] - # make a SmartDict out of the build options for our local use import smartdict build_env = smartdict.SmartDict() @@ -171,12 +66,13 @@ build_env.update(defines.m5_build_env) env = smartdict.SmartDict() env.update(os.environ) - # Function to provide to C++ so it can look up instances based on paths def resolveSimObject(name): obj = config.instanceDict[name] return obj.getCCObject() +from main import options, arguments, main + # The final hook to generate .ini files. Called from the user script # once the config is built. def instantiate(root): diff --git a/src/python/m5/attrdict.py b/src/python/m5/attrdict.py new file mode 100644 index 0000000000..4ee7f1b8c6 --- /dev/null +++ b/src/python/m5/attrdict.py @@ -0,0 +1,61 @@ +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +__all__ = [ 'attrdict' ] + +class attrdict(dict): + def __getattr__(self, attr): + if attr in self: + return self.__getitem__(attr) + return super(attrdict, self).__getattribute__(attr) + + def __setattr__(self, attr, value): + if attr in dir(self): + return super(attrdict, self).__setattr__(attr, value) + return self.__setitem__(attr, value) + + def __delattr__(self, attr): + if attr in self: + return self.__delitem__(attr) + return super(attrdict, self).__delattr__(attr, value) + +if __name__ == '__main__': + x = attrdict() + x.y = 1 + x['z'] = 2 + print x['y'], x.y + print x['z'], x.z + print dir(x) + print x + + print + + del x['y'] + del x.z + print dir(x) + print(x) diff --git a/src/python/m5/main.py b/src/python/m5/main.py new file mode 100644 index 0000000000..b4c89f6125 --- /dev/null +++ b/src/python/m5/main.py @@ -0,0 +1,306 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +import code, optparse, os, socket, sys +from datetime import datetime +from attrdict import attrdict + +try: + import info +except ImportError: + info = None + +__all__ = [ 'options', 'arguments', 'main' ] + +usage="%prog [m5 options] script.py [script options]" +version="%prog 2.0" +brief_copyright=''' +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved +''' + +# there's only one option parsing done, so make it global and add some +# helper functions to make it work well. +parser = optparse.OptionParser(usage=usage, version=version, + description=brief_copyright, + formatter=optparse.TitledHelpFormatter()) + +# current option group +group = None + +def set_group(*args, **kwargs): + '''set the current option group''' + global group + if not args and not kwargs: + group = None + else: + group = parser.add_option_group(*args, **kwargs) + +class splitter(object): + def __init__(self, split): + self.split = split + def __call__(self, option, opt_str, value, parser): + getattr(parser.values, option.dest).extend(value.split(self.split)) + +def add_option(*args, **kwargs): + '''add an option to the current option group, or global none set''' + + # if action=split, but allows the option arguments + # themselves to be lists separated by the split variable''' + + if kwargs.get('action', None) == 'append' and 'split' in kwargs: + split = kwargs.pop('split') + kwargs['default'] = [] + kwargs['type'] = 'string' + kwargs['action'] = 'callback' + kwargs['callback'] = splitter(split) + + if group: + return group.add_option(*args, **kwargs) + + return parser.add_option(*args, **kwargs) + +def bool_option(name, default, help): + '''add a boolean option called --name and --no-name. + Display help depending on which is the default''' + + tname = '--%s' % name + fname = '--no-%s' % name + dest = name.replace('-', '_') + if default: + thelp = optparse.SUPPRESS_HELP + fhelp = help + else: + thelp = help + fhelp = optparse.SUPPRESS_HELP + + add_option(tname, action="store_true", default=default, help=thelp) + add_option(fname, action="store_false", dest=dest, help=fhelp) + +# Help options +add_option('-A', "--authors", action="store_true", default=False, + help="Show author information") +add_option('-C', "--copyright", action="store_true", default=False, + help="Show full copyright information") +add_option('-R', "--readme", action="store_true", default=False, + help="Show the readme") +add_option('-N', "--release-notes", action="store_true", default=False, + help="Show the release notes") + +# Options for configuring the base simulator +add_option('-d', "--outdir", metavar="DIR", default=".", + help="Set the output directory to DIR [Default: %default]") +add_option('-i', "--interactive", action="store_true", default=False, + help="Invoke the interactive interpreter after running the script") +add_option('-p', "--path", metavar="PATH[:PATH]", action='append', split=':', + help="Prepend PATH to the system path when invoking the script") +add_option('-q', "--quiet", action="count", default=0, + help="Reduce verbosity") +add_option('-v', "--verbose", action="count", default=0, + help="Increase verbosity") + +# Statistics options +set_group("Statistics Options") +add_option("--stats-file", metavar="FILE", default="m5stats.txt", + help="Sets the output file for statistics [Default: %default]") + +# Debugging options +set_group("Debugging Options") +add_option("--debug-break", metavar="TIME[,TIME]", action='append', split=',', + help="Cycle to create a breakpoint") + +# Tracing options +set_group("Trace Options") +add_option("--trace-flags", metavar="FLAG[,FLAG]", action='append', split=',', + help="Sets the flags for tracing") +add_option("--trace-start", metavar="TIME", default='0s', + help="Start tracing at TIME (must have units)") +add_option("--trace-file", metavar="FILE", default="cout", + help="Sets the output file for tracing [Default: %default]") +add_option("--trace-circlebuf", metavar="SIZE", type="int", default=0, + help="If SIZE is non-zero, turn on the circular buffer with SIZE lines") +add_option("--no-trace-circlebuf", action="store_const", const=0, + dest='trace_circlebuf', help=optparse.SUPPRESS_HELP) +bool_option("trace-dumponexit", default=False, + help="Dump trace buffer on exit") +add_option("--trace-ignore", metavar="EXPR", action='append', split=':', + help="Ignore EXPR sim objects") + +# Execution Trace options +set_group("Execution Trace Options") +bool_option("speculative", default=True, + help="Don't capture speculative instructions") +bool_option("print-cycle", default=True, + help="Don't print cycle numbers in trace output") +bool_option("print-symbol", default=True, + help="Disable PC symbols in trace output") +bool_option("print-opclass", default=True, + help="Don't print op class type in trace output") +bool_option("print-thread", default=True, + help="Don't print thread number in trace output") +bool_option("print-effaddr", default=True, + help="Don't print effective address in trace output") +bool_option("print-data", default=True, + help="Don't print result data in trace output") +bool_option("print-iregs", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-fetch-seq", default=False, + help="Print fetch sequence numbers in trace output") +bool_option("print-cpseq", default=False, + help="Print correct path sequence numbers in trace output") + +options = attrdict() +arguments = [] + +def usage(exitcode=None): + print parser.help + if exitcode is not None: + sys.exit(exitcode) + +def parse_args(): + _opts,args = parser.parse_args() + opts = attrdict(_opts.__dict__) + + # setting verbose and quiet at the same time doesn't make sense + if opts.verbose > 0 and opts.quiet > 0: + usage(2) + + # store the verbosity in a single variable. 0 is default, + # negative numbers represent quiet and positive values indicate verbose + opts.verbose -= opts.quiet + + del opts.quiet + + options.update(opts) + arguments.extend(args) + return opts,args + +def main(): + import cc_main + + parse_args() + + done = False + if options.copyright: + done = True + print info.LICENSE + print + + if options.authors: + done = True + print 'Author information:' + print + print info.AUTHORS + print + + if options.readme: + done = True + print 'Readme:' + print + print info.README + print + + if options.release_notes: + done = True + print 'Release Notes:' + print + print info.RELEASE_NOTES + print + + if done: + sys.exit(0) + + if options.verbose >= 0: + print "M5 Simulator System" + print brief_copyright + print + print "M5 compiled %s" % cc_main.cvar.compileDate; + print "M5 started %s" % datetime.now().ctime() + print "M5 executing on %s" % socket.gethostname() + + # check to make sure we can find the listed script + if not arguments or not os.path.isfile(arguments[0]): + usage(2) + + # tell C++ about output directory + cc_main.setOutputDir(options.outdir) + + # update the system path with elements from the -p option + sys.path[0:0] = options.path + + import objects + + # set stats options + objects.Statistics.text_file = options.stats_file + + # set debugging options + objects.Debug.break_cycles = options.debug_break + + # set tracing options + objects.Trace.flags = options.trace_flags + objects.Trace.start = options.trace_start + objects.Trace.file = options.trace_file + objects.Trace.bufsize = options.trace_circlebuf + objects.Trace.dump_on_exit = options.trace_dumponexit + objects.Trace.ignore = options.trace_ignore + + # set execution trace options + objects.ExecutionTrace.speculative = options.speculative + objects.ExecutionTrace.print_cycle = options.print_cycle + objects.ExecutionTrace.pc_symbol = options.print_symbol + objects.ExecutionTrace.print_opclass = options.print_opclass + objects.ExecutionTrace.print_thread = options.print_thread + objects.ExecutionTrace.print_effaddr = options.print_effaddr + objects.ExecutionTrace.print_data = options.print_data + objects.ExecutionTrace.print_iregs = options.print_iregs + objects.ExecutionTrace.print_fetchseq = options.print_fetch_seq + objects.ExecutionTrace.print_cpseq = options.print_cpseq + + scope = { '__file__' : sys.argv[0] } + sys.argv = arguments + sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path + exec("import readline", scope) + execfile(sys.argv[0], scope) + + # once the script is done + if options.interactive: + interact = code.InteractiveConsole(scope) + interact.interact("M5 Interactive Console") + +if __name__ == '__main__': + from pprint import pprint + + parse_args() + + print 'opts:' + pprint(options, indent=4) + print + + print 'args:' + pprint(arguments, indent=4) diff --git a/src/sim/main.cc b/src/sim/main.cc index 5f34f6520f..d0725ab371 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -115,70 +115,11 @@ abortHandler(int sigtype) #endif } -/// Simulator executable name -char *myProgName = ""; - -/// Show brief help message. -void -showBriefHelp(ostream &out) -{ - char *prog = basename(myProgName); - - ccprintf(out, "Usage:\n"); - ccprintf(out, -"%s [-p ] [-i ] [-h] \n" -"\n" -" -p, --path prepends to PYTHONPATH instead of using\n" -" built-in zip archive. Useful when developing/debugging\n" -" changes to built-in Python libraries, as the new Python\n" -" can be tested without building a new m5 binary.\n\n" -" -i, --interactive forces entry into interactive mode after the supplied\n" -" script is executed (just like the -i option to the\n" -" Python interpreter).\n\n" -" -h Prints this help\n\n" -" config file name which ends in .py. (Normally you can\n" -" run --help to get help on that config files\n" -" parameters.\n\n", - prog); - -} - -const char *briefCopyright = -"Copyright (c) 2001-2006\n" -"The Regents of The University of Michigan\n" -"All Rights Reserved\n"; - -/// Print welcome message. -void -sayHello(ostream &out) -{ - extern const char *compileDate; // from date.cc - - ccprintf(out, "M5 Simulator System\n"); - // display copyright - ccprintf(out, "%s\n", briefCopyright); - ccprintf(out, "M5 compiled %d\n", compileDate); - ccprintf(out, "M5 started %s\n", Time::start); - - char *host = getenv("HOSTNAME"); - if (!host) - host = getenv("HOST"); - - if (host) - ccprintf(out, "M5 executing on %s\n", host); -} - - extern "C" { void init_cc_main(); } int main(int argc, char **argv) { - // Saze off program name - myProgName = argv[0]; - - sayHello(cerr); - signal(SIGFPE, SIG_IGN); // may occur on misspeculated paths signal(SIGTRAP, SIG_IGN); signal(SIGUSR1, dumpStatsHandler); // dump intermediate stats @@ -189,72 +130,18 @@ main(int argc, char **argv) Py_SetProgramName(argv[0]); // default path to m5 python code is the currently executing - // file... Python ZipImporter will find embedded zip archive - char *pythonpath = argv[0]; - - bool interactive = false; - bool show_help = false; - bool getopt_done = false; - int opt_index = 0; - - static struct option long_options[] = { - {"python", 1, 0, 'p'}, - {"interactive", 0, 0, 'i'}, - {"help", 0, 0, 'h'}, - {0,0,0,0} - }; - - do { - switch (getopt_long(argc, argv, "+p:ih", long_options, &opt_index)) { - // -p prepends to PYTHONPATH instead of - // using built-in zip archive. Useful when - // developing/debugging changes to built-in Python - // libraries, as the new Python can be tested without - // building a new m5 binary. - case 'p': - pythonpath = optarg; - break; - - // -i forces entry into interactive mode after the - // supplied script is executed (just like the -i option to - // the Python interpreter). - case 'i': - interactive = true; - break; - - case 'h': - show_help = true; - break; - case -1: - getopt_done = true; - break; - - default: - fatal("Unrecognized option %c\n", optopt); - } - } while (!getopt_done); - - if (show_help) { - showBriefHelp(cerr); - exit(1); - } - - // Fix up argc & argv to hide arguments we just processed. - // getopt() sets optind to the index of the first non-processed - // argv element. - argc -= optind; - argv += optind; - - // Set up PYTHONPATH to make sure the m5 module is found - string newpath(pythonpath); + // file... Python ZipImporter will find embedded zip archive. + // The M5_ARCHIVE environment variable can be used to override this. + char *m5_archive = getenv("M5_ARCHIVE"); + string pythonpath = m5_archive ? m5_archive : argv[0]; char *oldpath = getenv("PYTHONPATH"); if (oldpath != NULL) { - newpath += ":"; - newpath += oldpath; + pythonpath += ":"; + pythonpath += oldpath; } - if (setenv("PYTHONPATH", newpath.c_str(), true) == -1) + if (setenv("PYTHONPATH", pythonpath.c_str(), true) == -1) fatal("setenv: %s\n", strerror(errno)); // initialize embedded Python interpreter @@ -264,37 +151,8 @@ main(int argc, char **argv) // initialize SWIG 'cc_main' module init_cc_main(); - if (argc > 0) { - // extra arg(s): first is script file, remaining ones are args - // to script file - char *filename = argv[0]; - FILE *fp = fopen(filename, "r"); - if (!fp) { - fatal("cannot open file '%s'\n", filename); - } - - PyRun_AnyFile(fp, filename); - } else { - // no script file argument... force interactive prompt - interactive = true; - } - - if (interactive) { - // The following code to import readline was copied from Python - // 2.4.3's Modules/main.c. - // Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 - // Python Software Foundation; All Rights Reserved - // We should only enable this if we're actually using an - // interactive prompt. - PyObject *v; - v = PyImport_ImportModule("readline"); - if (v == NULL) - PyErr_Clear(); - else - Py_DECREF(v); - - PyRun_InteractiveLoop(stdin, "stdin"); - } + PyRun_SimpleString("import m5"); + PyRun_SimpleString("m5.main()"); // clean up Python intepreter. Py_Finalize(); From 7078d8d1b42c1a158c854b3e07800f20aa695bfb Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 11 Jul 2006 11:28:59 -0400 Subject: [PATCH 136/152] Fix option parsing. src/python/m5/main.py: Don't allow interspersed arguments, it messes things up --HG-- extra : convert_revision : 8f1bcf4391f570741d92bf5420879862a48f6016 --- src/python/m5/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/m5/main.py b/src/python/m5/main.py index b4c89f6125..904b241ca9 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -50,6 +50,7 @@ All Rights Reserved parser = optparse.OptionParser(usage=usage, version=version, description=brief_copyright, formatter=optparse.TitledHelpFormatter()) +parser.disable_interspersed_args() # current option group group = None From 6bcc65c1f866348f64a804a8bcc1f6dc06145afa Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 11 Jul 2006 15:42:31 -0400 Subject: [PATCH 137/152] Fix ordering issue with squashed Icache Fetches and Static data in packet. Now hello world works with 2 levels of cache with O3 CPU(multiple outstanding requests). src/cpu/o3/fetch_impl.hh: Fix ordering issue with squashed Icache Fetches and Static data in packet. --HG-- extra : convert_revision : a6adb87540b007ead0b4982cb3f31da8199fb5ca --- src/cpu/o3/fetch_impl.hh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index de883b5ba2..39a13f9f88 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -357,6 +357,8 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) return; } + memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); + if (!drainPending) { // Wake up the CPU (if it went to sleep and was waiting on // this completion event). @@ -548,7 +550,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataStatic(cacheData[tid]); + data_pkt->dataDynamic(new uint8_t[cacheBlkSize]); DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); From f7216daee06189ec3f432dcce6de27b5b4380880 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Tue, 11 Jul 2006 16:03:42 -0400 Subject: [PATCH 138/152] Add a cache version of FS (should really make this an option in original) Now to work on caches in FS, first steps: 1) LL/SC support (Top Level Cache Hooks) 2) Snooping in the bus (CSHR's for DMA Invalidates) --HG-- extra : convert_revision : b4e7984712f7dcd42649070c5ca538c87461e179 From 3218538740a6132273875f84ce0cb95a2c79a62d Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Wed, 12 Jul 2006 15:18:49 -0400 Subject: [PATCH 139/152] Fix __file__ for scripts src/python/m5/main.py: set __file__ to the script, not the m5 binary. --HG-- extra : convert_revision : a0bbd059d2fd321ae8ff68225abc8a7bb5c410ed --- src/python/m5/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 904b241ca9..80dbcb5aaa 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -283,9 +283,10 @@ def main(): objects.ExecutionTrace.print_fetchseq = options.print_fetch_seq objects.ExecutionTrace.print_cpseq = options.print_cpseq - scope = { '__file__' : sys.argv[0] } sys.argv = arguments sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path + + scope = { '__file__' : sys.argv[0] } exec("import readline", scope) execfile(sys.argv[0], scope) From bf4fdbe25a275eeb036cd5e9e05d126c52f90aba Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Wed, 12 Jul 2006 15:21:23 -0400 Subject: [PATCH 140/152] Add --pdb src/python/m5/main.py: Add a command line option to invoke pdb on your script --HG-- extra : convert_revision : ef5a2860bd3f6e479fa80eccaae0cb5541a20b50 --- src/python/m5/main.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 80dbcb5aaa..54c54c1d5d 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -119,6 +119,8 @@ add_option('-d', "--outdir", metavar="DIR", default=".", help="Set the output directory to DIR [Default: %default]") add_option('-i', "--interactive", action="store_true", default=False, help="Invoke the interactive interpreter after running the script") +add_option("--pdb", action="store_true", default=False, + help="Invoke the python debugger before running the script") add_option('-p', "--path", metavar="PATH[:PATH]", action='append', split=':', help="Prepend PATH to the system path when invoking the script") add_option('-q', "--quiet", action="count", default=0, @@ -287,8 +289,19 @@ def main(): sys.path = [ os.path.dirname(sys.argv[0]) ] + sys.path scope = { '__file__' : sys.argv[0] } - exec("import readline", scope) - execfile(sys.argv[0], scope) + + # we want readline if we're doing anything interactive + if options.interactive or options.pdb: + exec("import readline", scope) + + # if pdb was requested, execfile the thing under pdb, otherwise, + # just do the execfile normally + if options.pdb: + from pdb import Pdb + debugger = Pdb() + debugger.run('execfile("%s")' % sys.argv[0], scope) + else: + execfile(sys.argv[0], scope) # once the script is done if options.interactive: From 6d120b7912e554aaa44e28d1133f4bbfd9d04f66 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Wed, 12 Jul 2006 15:24:27 -0400 Subject: [PATCH 141/152] Track the PC of the cache data stored in fetch so it doesn't access memory multiple times if information is already in fetch. --HG-- extra : convert_revision : 00b160b255e998cf99286bcc21894110c7642624 --- src/cpu/o3/fetch.hh | 3 +++ src/cpu/o3/fetch_impl.hh | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 85654cebc7..0331cf07fb 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -404,6 +404,9 @@ class DefaultFetch /** The cache line being fetched. */ uint8_t *cacheData[Impl::MaxThreads]; + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index de883b5ba2..a430f44723 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -517,6 +517,11 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Align the fetch PC so it's at the start of a cache block. fetch_PC = icacheBlockAlignPC(fetch_PC); + // If we've already got the block, no need to try to fetch it again. + if (fetch_PC == cacheDataPC[tid]) { + return true; + } + // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -550,6 +555,8 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(cacheData[tid]); + cacheDataPC[tid] = fetch_PC; + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); fetchedCacheLines++; From 2bc9229ea7195b307222bad6de966ea4a27a3f6b Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Wed, 12 Jul 2006 20:22:07 -0400 Subject: [PATCH 142/152] memory mode information now contained in system object States are now running, draining, or drained. memory state information moved into system object system parameter is not fs only for cpus Implement drain() support in devices Update for drain() call that returns number of times drain_event->process() will be called Break O3 CPU! No sense in putting in a hack change that kevin is going to remove in a few minutes i imagine src/cpu/simple/atomic.cc: src/cpu/simple/atomic.hh: Since se mode has a system, allow access to it Verify that the atomic cpu is connected to an atomic system on resume src/cpu/simple/base.cc: Since se mode has a system, allow access to it src/cpu/simple/timing.cc: src/cpu/simple/timing.hh: Update for new drain() call that returns number of times drain_event->process() will be called and memory state being moved into the system Since se mode has a system, allow access to it Verify that the timing cpu is connected to an timing system on resume src/dev/ide_disk.cc: src/dev/io_device.cc: src/dev/io_device.hh: src/dev/ns_gige.cc: src/dev/ns_gige.hh: src/dev/pcidev.cc: src/dev/pcidev.hh: src/dev/sinic.cc: src/dev/sinic.hh: Implement drain() support in devices src/python/m5/config.py: Allow drain to return number of times drain_event->process() will be called. Normally 0 or 1 but things like O3 cpu or devices with multiple ports may want to call it many times src/python/m5/objects/BaseCPU.py: move system parameter out of fs to everyone src/sim/sim_object.cc: src/sim/sim_object.hh: States are now running, draining, or drained. memory state information moved into system object src/sim/system.cc: src/sim/system.hh: memory mode information now contained in system object --HG-- extra : convert_revision : 1389c77e66ee6d9710bf77b4306fb47e107b21cf --- src/cpu/simple/atomic.cc | 14 ++- src/cpu/simple/atomic.hh | 1 + src/cpu/simple/base.cc | 2 +- src/cpu/simple/timing.cc | 26 +++--- src/cpu/simple/timing.hh | 3 +- src/dev/ide_disk.cc | 8 +- src/dev/io_device.cc | 147 ++++++++++++++++++++++--------- src/dev/io_device.hh | 49 ++++++++--- src/dev/ns_gige.cc | 22 ++++- src/dev/ns_gige.hh | 2 + src/dev/pcidev.cc | 16 +++- src/dev/pcidev.hh | 5 ++ src/dev/sinic.cc | 16 +++- src/dev/sinic.hh | 1 + src/python/m5/config.py | 5 +- src/python/m5/objects/BaseCPU.py | 2 +- src/sim/sim_object.cc | 29 ++---- src/sim/sim_object.hh | 15 ++-- src/sim/system.cc | 8 ++ src/sim/system.hh | 17 ++++ 20 files changed, 270 insertions(+), 118 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index 12bfdeb9bf..1752b2b5b4 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -33,6 +33,7 @@ #include "cpu/simple/atomic.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -172,6 +173,13 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); } +void +AtomicSimpleCPU::resume() +{ + assert(system->getMemoryMode() == System::Atomic); + changeState(SimObject::Running); +} + void AtomicSimpleCPU::switchOut() { @@ -451,11 +459,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) Param max_loads_any_thread; Param max_loads_all_threads; SimObjectParam mem; + SimObjectParam system; #if FULL_SYSTEM SimObjectParam itb; SimObjectParam dtb; - SimObjectParam system; Param cpu_id; Param profile; #else @@ -483,11 +491,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -520,11 +528,11 @@ CREATE_SIM_OBJECT(AtomicSimpleCPU) params->width = width; params->simulate_stalls = simulate_stalls; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 179b4a7211..d59ca01aa4 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -127,6 +127,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index a505411896..af10e64d7e 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -55,10 +55,10 @@ #include "sim/sim_events.hh" #include "sim/sim_object.hh" #include "sim/stats.hh" +#include "sim/system.hh" #if FULL_SYSTEM #include "base/remote_gdb.hh" -#include "sim/system.hh" #include "arch/tlb.hh" #include "arch/stacktrace.hh" #include "arch/vtophys.hh" diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index e55301c6bf..d2c2c7c47a 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -33,6 +33,7 @@ #include "cpu/simple/timing.hh" #include "mem/packet_impl.hh" #include "sim/builder.hh" +#include "sim/system.hh" using namespace std; using namespace TheISA; @@ -91,7 +92,7 @@ TimingSimpleCPU::TimingSimpleCPU(Params *p) ifetch_pkt = dcache_pkt = NULL; drainEvent = NULL; fetchEvent = NULL; - state = SimObject::Timing; + changeState(SimObject::Running); } @@ -113,18 +114,18 @@ TimingSimpleCPU::unserialize(Checkpoint *cp, const string §ion) BaseSimpleCPU::unserialize(cp, section); } -bool +unsigned int TimingSimpleCPU::drain(Event *drain_event) { // TimingSimpleCPU is ready to drain if it's not waiting for // an access to complete. if (status() == Idle || status() == Running || status() == SwitchedOut) { - changeState(SimObject::DrainedTiming); - return true; + changeState(SimObject::Drained); + return 0; } else { changeState(SimObject::Draining); drainEvent = drain_event; - return false; + return 1; } } @@ -142,12 +143,9 @@ TimingSimpleCPU::resume() new EventWrapper(this, false); fetchEvent->schedule(curTick); } -} -void -TimingSimpleCPU::setMemoryMode(State new_mode) -{ - assert(new_mode == SimObject::Timing); + assert(system->getMemoryMode() == System::Timing); + changeState(SimObject::Running); } void @@ -514,7 +512,7 @@ void TimingSimpleCPU::completeDrain() { DPRINTF(Config, "Done draining\n"); - changeState(SimObject::DrainedTiming); + changeState(SimObject::Drained); drainEvent->process(); } @@ -551,11 +549,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(TimingSimpleCPU) Param max_loads_any_thread; Param max_loads_all_threads; SimObjectParam mem; + SimObjectParam system; #if FULL_SYSTEM SimObjectParam itb; SimObjectParam dtb; - SimObjectParam system; Param cpu_id; Param profile; #else @@ -583,11 +581,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(TimingSimpleCPU) INIT_PARAM(max_loads_all_threads, "terminate when all threads have reached this load count"), INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), #if FULL_SYSTEM INIT_PARAM(itb, "Instruction TLB"), INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(system, "system object"), INIT_PARAM(cpu_id, "processor ID"), INIT_PARAM(profile, ""), #else @@ -618,11 +616,11 @@ CREATE_SIM_OBJECT(TimingSimpleCPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; params->mem = mem; + params->system = system; #if FULL_SYSTEM params->itb = itb; params->dtb = dtb; - params->system = system; params->cpu_id = cpu_id; params->profile = profile; #else diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index 0a3f91e6c5..ac36e5c994 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -137,9 +137,8 @@ class TimingSimpleCPU : public BaseSimpleCPU virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); - virtual bool drain(Event *drain_event); + virtual unsigned int drain(Event *drain_event); virtual void resume(); - virtual void setMemoryMode(State new_mode); void switchOut(); void takeOverFrom(BaseCPU *oldCPU); diff --git a/src/dev/ide_disk.cc b/src/dev/ide_disk.cc index dc78021f8b..12564ddd0b 100644 --- a/src/dev/ide_disk.cc +++ b/src/dev/ide_disk.cc @@ -318,7 +318,7 @@ IdeDisk::doDmaTransfer() panic("Inconsistent DMA transfer state: dmaState = %d devState = %d\n", dmaState, devState); - if (ctrl->dmaPending()) { + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaTransferEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else @@ -398,8 +398,7 @@ IdeDisk::doDmaRead() curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaReadWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaReadCG->done()) { @@ -474,8 +473,7 @@ IdeDisk::doDmaWrite() dmaWriteCG = new ChunkGenerator(curPrd.getBaseAddr(), curPrd.getByteCount(), TheISA::PageBytes); } - if (ctrl->dmaPending()) { - panic("shouldn't be reentant??"); + if (ctrl->dmaPending() || ctrl->getState() != SimObject::Running) { dmaWriteWaitEvent.schedule(curTick + DMA_BACKOFF_PERIOD); return; } else if (!dmaWriteCG->done()) { diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index cb4850108f..ed2862065b 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -32,10 +32,12 @@ #include "base/trace.hh" #include "dev/io_device.hh" #include "sim/builder.hh" +#include "sim/system.hh" -PioPort::PioPort(PioDevice *dev, Platform *p, std::string pname) - : Port(dev->name() + pname), device(dev), platform(p) +PioPort::PioPort(PioDevice *dev, System *s, std::string pname) + : Port(dev->name() + pname), device(dev), sys(s), + outTiming(0), drainEvent(NULL) { } @@ -68,13 +70,23 @@ PioPort::recvRetry() if (result) transmitList.pop_front(); } + if (transmitList.size() == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } void PioPort::SendEvent::process() { + port->outTiming--; + assert(port->outTiming >= 0); if (port->Port::sendTiming(packet)) - return; + if (port->transmitList.size() == 0 && port->drainEvent) { + port->drainEvent->process(); + port->drainEvent = NULL; + } + return; port->transmitList.push_back(packet); } @@ -105,6 +117,15 @@ PioPort::recvTiming(Packet *pkt) return true; } +unsigned int +PioPort::drain(Event *de) +{ + if (outTiming == 0 && transmitList.size() == 0) + return 0; + drainEvent = de; + return 1; +} + PioDevice::~PioDevice() { if (pioPort) @@ -119,6 +140,19 @@ PioDevice::init() pioPort->sendStatusChange(Port::RangeChange); } + +unsigned int +PioDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + void BasicPioDevice::addressRanges(AddrRangeList &range_list) { @@ -128,8 +162,9 @@ BasicPioDevice::addressRanges(AddrRangeList &range_list) } -DmaPort::DmaPort(DmaDevice *dev, Platform *p) - : Port(dev->name() + "-dmaport"), device(dev), platform(p), pendingCount(0) +DmaPort::DmaPort(DmaDevice *dev, System *s) + : Port(dev->name() + "-dmaport"), device(dev), sys(s), pendingCount(0), + actionInProgress(0), drainEvent(NULL) { } bool @@ -159,6 +194,11 @@ DmaPort::recvTiming(Packet *pkt) } delete pkt->req; delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } } else { panic("Got packet without sender state... huh?\n"); } @@ -170,6 +210,29 @@ DmaDevice::DmaDevice(Params *p) : PioDevice(p), dmaPort(NULL) { } + +unsigned int +DmaDevice::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + +unsigned int +DmaPort::drain(Event *de) +{ + if (pendingCount == 0) + return 0; + drainEvent = de; + return 1; +} + + void DmaPort::recvRetry() { @@ -195,6 +258,8 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, { assert(event); + assert(device->getState() == SimObject::Running); + DmaReqState *reqState = new DmaReqState(event, this, size); for (ChunkGenerator gen(addr, size, peerBlockSize()); @@ -212,51 +277,53 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, pendingCount++; sendDma(pkt); } + } void DmaPort::sendDma(Packet *pkt, bool front) { - // some kind of selction between access methods - // more work is going to have to be done to make - // switching actually work - /* MemState state = device->platform->system->memState; + // some kind of selction between access methods + // more work is going to have to be done to make + // switching actually work + System::MemoryMode state = sys->getMemoryMode(); + if (state == System::Timing) { + DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", + pkt, pkt->getAddr()); + if (transmitList.size() || !sendTiming(pkt)) { + if (front) + transmitList.push_front(pkt); + else + transmitList.push_back(pkt); + DPRINTF(DMA, "-- Failed: queued\n"); + } else { + DPRINTF(DMA, "-- Done\n"); + } + } else if (state == System::Atomic) { + sendAtomic(pkt); + assert(pkt->senderState); + DmaReqState *state = dynamic_cast(pkt->senderState); + assert(state); - if (state == Timing) { */ - DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", - pkt, pkt->getAddr()); - if (transmitList.size() || !sendTiming(pkt)) { - if (front) - transmitList.push_front(pkt); - else - transmitList.push_back(pkt); - DPRINTF(DMA, "-- Failed: queued\n"); - } else { - DPRINTF(DMA, "-- Done\n"); - } - /* } else if (state == Atomic) { - sendAtomic(pkt); - if (pkt->senderState) { - DmaReqState *state = dynamic_cast(pkt->senderState); - assert(state); - state->completionEvent->schedule(curTick + (pkt->time - - pkt->req->getTime()) +1); - delete state; - } - pendingCount--; - assert(pendingCount >= 0); - delete pkt->req; - delete pkt; + state->numBytes += pkt->req->getSize(); + if (state->totBytes == state->numBytes) { + state->completionEvent->schedule(curTick + + (pkt->time - pkt->req->getTime()) +1); + delete state; + delete pkt->req; + } + pendingCount--; + assert(pendingCount >= 0); + delete pkt; + + if (pendingCount == 0 && drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } - } else if (state == Functional) { - sendFunctional(pkt); - // Is this correct??? - completionEvent->schedule(pkt->req->responseTime - pkt->req->requestTime); - completionEvent == NULL; } else panic("Unknown memory command state."); - */ } DmaDevice::~DmaDevice() diff --git a/src/dev/io_device.hh b/src/dev/io_device.hh index 40edf68757..fa3f982476 100644 --- a/src/dev/io_device.hh +++ b/src/dev/io_device.hh @@ -60,9 +60,9 @@ class PioPort : public Port /** The device that this port serves. */ PioDevice *device; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** A list of outgoing timing response packets that haven't been serviced * yet. */ @@ -106,16 +106,27 @@ class PioPort : public Port friend class PioPort; }; + /** Number of timing requests that are emulating the device timing before + * attempting to end up on the bus. + */ + int outTiming; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + /** Schedule a sendTiming() event to be called in the future. */ void sendTiming(Packet *pkt, Tick time) - { new PioPort::SendEvent(this, pkt, time); } + { outTiming++; new PioPort::SendEvent(this, pkt, time); } /** This function is notification that the device should attempt to send a * packet again. */ virtual void recvRetry(); public: - PioPort(PioDevice *dev, Platform *p, std::string pname = "-pioport"); + PioPort(PioDevice *dev, System *s, std::string pname = "-pioport"); + + unsigned int drain(Event *de); friend class PioPort::SendEvent; }; @@ -147,13 +158,20 @@ class DmaPort : public Port DmaDevice *device; std::list transmitList; - /** The platform that device/port are in. This is used to select which mode + /** The system that device/port are in. This is used to select which mode * we are currently operating in. */ - Platform *platform; + System *sys; /** Number of outstanding packets the dma port has. */ int pendingCount; + /** If a dmaAction is in progress. */ + int actionInProgress; + + /** If we need to drain, keep the drain event around until we're done + * here.*/ + Event *drainEvent; + virtual bool recvTiming(Packet *pkt); virtual Tick recvAtomic(Packet *pkt) { panic("dma port shouldn't be used for pio access."); } @@ -171,13 +189,14 @@ class DmaPort : public Port void sendDma(Packet *pkt, bool front = false); public: - DmaPort(DmaDevice *dev, Platform *p); + DmaPort(DmaDevice *dev, System *s); void dmaAction(Packet::Command cmd, Addr addr, int size, Event *event, uint8_t *data = NULL); bool dmaPending() { return pendingCount > 0; } + unsigned int drain(Event *de); }; /** @@ -196,6 +215,8 @@ class PioDevice : public MemObject * transaction we should perform. */ Platform *platform; + System *sys; + /** The pioPort that handles the requests for us and provides us requests * that it sees. */ PioPort *pioPort; @@ -240,20 +261,22 @@ class PioDevice : public MemObject const Params *params() const { return _params; } PioDevice(Params *p) - : MemObject(p->name), platform(p->platform), pioPort(NULL), - _params(p) + : MemObject(p->name), platform(p->platform), sys(p->system), + pioPort(NULL), _params(p) {} virtual ~PioDevice(); virtual void init(); + virtual unsigned int drain(Event *de); + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else return NULL; @@ -310,17 +333,19 @@ class DmaDevice : public PioDevice bool dmaPending() { return dmaPort->dmaPending(); } + virtual unsigned int drain(Event *de); + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "pio") { if (pioPort != NULL) panic("pio port already connected to."); - pioPort = new PioPort(this, params()->platform); + pioPort = new PioPort(this, sys); return pioPort; } else if (if_name == "dma") { if (dmaPort != NULL) panic("dma port already connected to."); - dmaPort = new DmaPort(this, params()->platform); + dmaPort = new DmaPort(this, sys); return dmaPort; } else return NULL; diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc index 179a2c62dd..bf2279d93a 100644 --- a/src/dev/ns_gige.cc +++ b/src/dev/ns_gige.cc @@ -1377,7 +1377,7 @@ NSGigE::doRxDmaRead() assert(rxDmaState == dmaIdle || rxDmaState == dmaReadWaiting); rxDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaReadWaiting; else dmaRead(rxDmaAddr, rxDmaLen, &rxDmaReadEvent, (uint8_t*)rxDmaData); @@ -1408,7 +1408,7 @@ NSGigE::doRxDmaWrite() assert(rxDmaState == dmaIdle || rxDmaState == dmaWriteWaiting); rxDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) rxDmaState = dmaWriteWaiting; else dmaWrite(rxDmaAddr, rxDmaLen, &rxDmaWriteEvent, (uint8_t*)rxDmaData); @@ -1826,7 +1826,7 @@ NSGigE::doTxDmaRead() assert(txDmaState == dmaIdle || txDmaState == dmaReadWaiting); txDmaState = dmaReading; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaReadWaiting; else dmaRead(txDmaAddr, txDmaLen, &txDmaReadEvent, (uint8_t*)txDmaData); @@ -1857,7 +1857,7 @@ NSGigE::doTxDmaWrite() assert(txDmaState == dmaIdle || txDmaState == dmaWriteWaiting); txDmaState = dmaWriting; - if (dmaPending()) + if (dmaPending() || getState() != Running) txDmaState = dmaWriteWaiting; else dmaWrite(txDmaAddr, txDmaLen, &txDmaWriteEvent, (uint8_t*)txDmaData); @@ -2406,6 +2406,20 @@ NSGigE::recvPacket(EthPacketPtr packet) return true; } + +void +NSGigE::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + + //===================================================================== // // diff --git a/src/dev/ns_gige.hh b/src/dev/ns_gige.hh index ea72437770..080c0b1f32 100644 --- a/src/dev/ns_gige.hh +++ b/src/dev/ns_gige.hh @@ -391,6 +391,8 @@ class NSGigE : public PciDev virtual void serialize(std::ostream &os); virtual void unserialize(Checkpoint *cp, const std::string §ion); + virtual void resume(); + public: void regStats(); diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index 62a7324ad4..e81e0d1ee2 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -56,8 +56,8 @@ using namespace std; PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid, int funcid, Platform *p) - : PioPort(dev,p,"-pciconf"), device(dev), busId(busid), deviceId(devid), - functionId(funcid) + : PioPort(dev,p->system,"-pciconf"), device(dev), platform(p), + busId(busid), deviceId(devid), functionId(funcid) { configAddr = platform->calcConfigAddr(busId, deviceId, functionId); } @@ -132,6 +132,18 @@ PciDev::init() PioDevice::init(); } +unsigned int +PciDev::drain(Event *de) +{ + unsigned int count; + count = pioPort->drain(de) + dmaPort->drain(de) + configPort->drain(de); + if (count) + changeState(Draining); + else + changeState(Drained); + return count; +} + Tick PciDev::readConfig(Packet *pkt) { diff --git a/src/dev/pcidev.hh b/src/dev/pcidev.hh index 20ab9364a8..847fb07d03 100644 --- a/src/dev/pcidev.hh +++ b/src/dev/pcidev.hh @@ -95,6 +95,8 @@ class PciDev : public DmaDevice virtual void getDeviceAddressRanges(AddrRangeList &resp, AddrRangeList &snoop); + Platform *platform; + int busId; int deviceId; int functionId; @@ -249,6 +251,9 @@ class PciDev : public DmaDevice */ virtual void unserialize(Checkpoint *cp, const std::string §ion); + + virtual unsigned int drain(Event *de); + virtual Port *getPort(const std::string &if_name, int idx = -1) { if (if_name == "config") { diff --git a/src/dev/sinic.cc b/src/dev/sinic.cc index dddda1f1cb..815cecca5a 100644 --- a/src/dev/sinic.cc +++ b/src/dev/sinic.cc @@ -921,7 +921,7 @@ Device::rxKick() break; case rxBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; rxDmaAddr = params()->platform->pciToDma( @@ -1109,7 +1109,7 @@ Device::txKick() break; case txBeginCopy: - if (dmaPending()) + if (dmaPending() || getState() != Running) goto exit; txDmaAddr = params()->platform->pciToDma( @@ -1287,6 +1287,18 @@ Device::recvPacket(EthPacketPtr packet) return true; } +void +Device::resume() +{ + SimObject::resume(); + + // During drain we could have left the state machines in a waiting state and + // they wouldn't get out until some other event occured to kick them. + // This way they'll get out immediately + txKick(); + rxKick(); +} + //===================================================================== // // diff --git a/src/dev/sinic.hh b/src/dev/sinic.hh index f6c229039a..eece4ba6b8 100644 --- a/src/dev/sinic.hh +++ b/src/dev/sinic.hh @@ -266,6 +266,7 @@ class Device : public Base public: virtual Tick read(Packet *pkt); virtual Tick write(Packet *pkt); + virtual void resume(); void prepareIO(int cpu, int index); void prepareRead(int cpu, int index); diff --git a/src/python/m5/config.py b/src/python/m5/config.py index 8291e1e1bd..8eed28dccb 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -547,8 +547,7 @@ class SimObject(object): count = 0 # ParamContexts don't serialize if isinstance(self, SimObject) and not isinstance(self, ParamContext): - if not self._ccObject.drain(drain_event): - count = 1 + count += self._ccObject.drain(drain_event) if recursive: for child in self._children.itervalues(): count += child.startDrain(drain_event, True) @@ -561,7 +560,7 @@ class SimObject(object): child.resume() def changeTiming(self, mode): - if isinstance(self, SimObject) and not isinstance(self, ParamContext): + if isinstance(self, System): self._ccObject.setMemoryMode(mode) for child in self._children.itervalues(): child.changeTiming(mode) diff --git a/src/python/m5/objects/BaseCPU.py b/src/python/m5/objects/BaseCPU.py index 2e78578dfe..5bf98be9c9 100644 --- a/src/python/m5/objects/BaseCPU.py +++ b/src/python/m5/objects/BaseCPU.py @@ -6,10 +6,10 @@ class BaseCPU(SimObject): abstract = True mem = Param.MemObject("memory") + system = Param.System(Parent.any, "system object") if build_env['FULL_SYSTEM']: dtb = Param.AlphaDTB("Data TLB") itb = Param.AlphaITB("Instruction TLB") - system = Param.System(Parent.any, "system object") cpu_id = Param.Int(-1, "CPU identifier") else: workload = VectorParam.Process("processes to run") diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc index a0278dba0f..d12b06b7ad 100644 --- a/src/sim/sim_object.cc +++ b/src/sim/sim_object.cc @@ -72,7 +72,7 @@ SimObject::SimObject(Params *p) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); - state = Atomic; + state = Running; } // @@ -88,7 +88,7 @@ SimObject::SimObject(const string &_name) doRecordEvent = !Stats::event_ignore.match(name()); simObjectList.push_back(this); - state = Atomic; + state = Running; } void @@ -269,38 +269,23 @@ SimObject::recordEvent(const std::string &stat) Stats::recordEvent(stat); } -bool +unsigned int SimObject::drain(Event *drain_event) { - if (state != DrainedAtomic && state != Atomic) { - panic("Must implement your own drain function if it is to be used " - "in timing mode!"); - } - state = DrainedAtomic; - return true; + state = Drained; + return 0; } void SimObject::resume() { - if (state == DrainedAtomic) { - state = Atomic; - } else if (state == DrainedTiming) { - state = Timing; - } + state = Running; } void SimObject::setMemoryMode(State new_mode) { - assert(new_mode == Timing || new_mode == Atomic); - if (state == DrainedAtomic && new_mode == Timing) { - state = DrainedTiming; - } else if (state == DrainedTiming && new_mode == Atomic) { - state = DrainedAtomic; - } else { - state = new_mode; - } + panic("setMemoryMode() should only be called on systems"); } void diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh index 7ecc009588..38f2bdd232 100644 --- a/src/sim/sim_object.hh +++ b/src/sim/sim_object.hh @@ -60,16 +60,15 @@ class SimObject : public Serializable, protected StartupCallback }; enum State { - Atomic, - Timing, + Running, Draining, - DrainedAtomic, - DrainedTiming + Drained }; + private: + State state; protected: Params *_params; - State state; void changeState(State new_state) { state = new_state; } @@ -116,8 +115,10 @@ class SimObject : public Serializable, protected StartupCallback // Methods to drain objects in order to take checkpoints // Or switch from timing -> atomic memory model - // Drain returns false if the SimObject cannot drain immediately. - virtual bool drain(Event *drain_event); + // Drain returns 0 if the simobject can drain immediately or + // the number of times the drain_event's process function will be called + // before the object will be done draining. Normally this should be 1 + virtual unsigned int drain(Event *drain_event); virtual void resume(); virtual void setMemoryMode(State new_mode); virtual void switchOut(); diff --git a/src/sim/system.cc b/src/sim/system.cc index 89e7b8542b..2780d0fdac 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -143,6 +143,14 @@ int rgdb_wait = -1; #endif // FULL_SYSTEM + +void +System::setMemoryMode(MemoryMode mode) +{ + assert(getState() == Drained); + memoryMode = mode; +} + int System::registerThreadContext(ThreadContext *tc, int id) { diff --git a/src/sim/system.hh b/src/sim/system.hh index 059dc92dc7..77777aed40 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -61,6 +61,21 @@ class RemoteGDB; class System : public SimObject { public: + enum MemoryMode { + Invalid=0, + Atomic, + Timing + }; + + + MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; } + + /** Change the memory mode of the system. This should only be called by the + * python!! + * @param mode Mode to change to (atomic/timing) + */ + void setMemoryMode(MemoryMode mode); + PhysicalMemory *physmem; PCEventQueue pcEventQueue; @@ -108,6 +123,8 @@ class System : public SimObject protected: + MemoryMode memoryMode; + #if FULL_SYSTEM /** * Fix up an address used to match PCs for hooking simulator From 63c999edef982e995ab65e0a3d672b0d7900c6f3 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 13 Jul 2006 12:21:21 -0400 Subject: [PATCH 143/152] Fix help message printing. Might need to clean up the handling of the sys.exit() call, as right now it prints out "None" at the end (not sure why). src/python/m5/main.py: Fix help message printing. --HG-- extra : convert_revision : 6906234101eb7ff7df7933e9aede0362b5a991bd --- src/python/m5/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 54c54c1d5d..aae74d3711 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -182,7 +182,7 @@ options = attrdict() arguments = [] def usage(exitcode=None): - print parser.help + print parser.print_help() if exitcode is not None: sys.exit(exitcode) From a0a952d5ff8e27d34c7fa68fe5199c57670c53d1 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 13 Jul 2006 13:08:58 -0400 Subject: [PATCH 144/152] Update for changes to draining. --HG-- extra : convert_revision : 5038dd8be72827f40cf89318db0b2bb4f9bbd864 --- src/cpu/o3/cpu.cc | 16 +++++++++------- src/cpu/o3/cpu.hh | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7d2727401c..6e9b425c0e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -441,7 +441,7 @@ FullO3CPU::tick() if (!tickEvent.scheduled()) { if (_status == SwitchedOut || - getState() == SimObject::DrainedTiming) { + getState() == SimObject::Drained) { // increment stat lastRunningCycle = curTick; } else if (!activityRec.active()) { @@ -803,7 +803,7 @@ FullO3CPU::unserialize(Checkpoint *cp, const std::string §ion) } template -bool +unsigned int FullO3CPU::drain(Event *drain_event) { drainCount = 0; @@ -815,7 +815,7 @@ FullO3CPU::drain(Event *drain_event) // Wake the CPU and record activity so everything can drain out if // the CPU was not able to immediately drain. - if (getState() != SimObject::DrainedTiming) { + if (getState() != SimObject::Drained) { // A bit of a hack...set the drainEvent after all the drain() // calls have been made, that way if all of the stages drain // immediately, the signalDrained() function knows not to call @@ -825,9 +825,9 @@ FullO3CPU::drain(Event *drain_event) wakeCPU(); activityRec.activity(); - return false; + return 1; } else { - return true; + return 0; } } @@ -835,19 +835,21 @@ template void FullO3CPU::resume() { + assert(system->getMemoryMode() == System::Timing); fetch.resume(); decode.resume(); rename.resume(); iew.resume(); commit.resume(); + changeState(SimObject::Running); + if (_status == SwitchedOut || _status == Idle) return; if (!tickEvent.scheduled()) tickEvent.schedule(curTick); _status = Running; - changeState(SimObject::Timing); } template @@ -858,7 +860,7 @@ FullO3CPU::signalDrained() if (tickEvent.scheduled()) tickEvent.squash(); - changeState(SimObject::DrainedTiming); + changeState(SimObject::Drained); if (drainEvent) { drainEvent->process(); diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2fbd013ac8..83cb966e31 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -330,7 +330,7 @@ class FullO3CPU : public BaseO3CPU /** Starts draining the CPU's pipeline of all instructions in * order to stop all memory accesses. */ - virtual bool drain(Event *drain_event); + virtual unsigned int drain(Event *drain_event); /** Resumes execution after a drain. */ virtual void resume(); From 2af213022ce6d58eee2809f300d7450e89a4bce9 Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 13 Jul 2006 13:09:29 -0400 Subject: [PATCH 145/152] Fix for bug when squashing and the fetching. Now fetch checks if the cache data is valid. --HG-- extra : convert_revision : 07b8eda3e90bbbb3ed470c8cc3cf1b63371ab529 --- src/cpu/o3/fetch.hh | 3 +++ src/cpu/o3/fetch_impl.hh | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 0331cf07fb..931919af87 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -407,6 +407,9 @@ class DefaultFetch /** The PC of the cacheline that has been loaded. */ Addr cacheDataPC[Impl::MaxThreads]; + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + /** Size of instructions. */ int instSize; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 4045492ca5..4184e18679 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -162,6 +162,8 @@ DefaultFetch::DefaultFetch(Params *params) // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; stalls[tid].decode = 0; stalls[tid].rename = 0; @@ -358,6 +360,7 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt) } memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); + cacheDataValid[tid] = true; if (!drainPending) { // Wake up the CPU (if it went to sleep and was waiting on @@ -520,7 +523,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid fetch_PC = icacheBlockAlignPC(fetch_PC); // If we've already got the block, no need to try to fetch it again. - if (fetch_PC == cacheDataPC[tid]) { + if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) { return true; } @@ -555,9 +558,10 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Build packet here. PacketPtr data_pkt = new Packet(mem_req, Packet::ReadReq, Packet::Broadcast); - data_pkt->dataDynamic(new uint8_t[cacheBlkSize]); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); cacheDataPC[tid] = fetch_PC; + cacheDataValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); From 1e4acb8e017ce81694c514af21ad817e9b1a078e Mon Sep 17 00:00:00 2001 From: Kevin Lim Date: Thu, 13 Jul 2006 13:12:51 -0400 Subject: [PATCH 146/152] Move Dcache port creation from LSQUnit to LSQ in order to support Ron's recent changes, and using the O3CPU in SMT mode. src/cpu/o3/lsq.hh: Update to have LSQ work with only one dcache port for all LSQ Units. LSQ has the dcache port, and the LSQ Units must tell the LSQ if the cache has become blocked. src/cpu/o3/lsq_impl.hh: Updates to have the LSQ work with only one dcache port for all LSQUnits. src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: Update for LSQ to create dcache port instead of LSQUnits. Now LSQUnits are given the dcache port from the LSQ, and also must check the LSQ if the cache is blocked prior to accessing the cache. --HG-- extra : convert_revision : 2708adbf323f4e7647dc0c1e31ef5bb4596b89f8 --- src/cpu/o3/lsq.hh | 59 ++++++++++++++++++++++++- src/cpu/o3/lsq_impl.hh | 53 ++++++++++++++++++++-- src/cpu/o3/lsq_unit.hh | 88 +++++++++++-------------------------- src/cpu/o3/lsq_unit_impl.hh | 51 +++------------------ 4 files changed, 139 insertions(+), 112 deletions(-) diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index d5890950f0..190734dc2a 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -70,7 +70,7 @@ class LSQ { * to work. For now it just returns the port from one of the * threads. */ - Port *getDcachePort() { return thread[0].getDcachePort(); } + Port *getDcachePort() { return &dcachePort; } /** Sets the pointer to the list of active threads. */ void setActiveThreads(std::list *at_ptr); @@ -258,6 +258,15 @@ class LSQ { bool willWB(unsigned tid) { return thread[tid].willWB(); } + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != -1; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(int tid) + { retryTid = tid; } + /** Debugging function to print out all instructions. */ void dumpInsts(); /** Debugging function to print out instructions from a specific thread. */ @@ -274,7 +283,49 @@ class LSQ { template Fault write(RequestPtr req, T &data, int store_idx); - private: + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : lsq(_lsq) + { } + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + + protected: /** The LSQ policy for SMT mode. */ LSQPolicy lsqPolicy; @@ -303,6 +354,10 @@ class LSQ { /** Number of Threads. */ unsigned numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + int retryTid; }; template diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 89fd1a71da..4e39570298 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -35,10 +35,54 @@ using namespace std; +template +Tick +LSQ::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template +void +LSQ::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template +void +LSQ::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template +bool +LSQ::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); + return true; +} + +template +void +LSQ::DcachePort::recvRetry() +{ + lsq->thread[lsq->retryTid].recvRetry(); + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; +} + template LSQ::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) + : dcachePort(this), LQEntries(params->LQEntries), + SQEntries(params->SQEntries), numThreads(params->numberOfThreads), + retryTid(-1) { DPRINTF(LSQ, "Creating LSQ object.\n"); @@ -94,7 +138,8 @@ LSQ::LSQ(Params *params) //Initialize LSQs for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); } } @@ -130,6 +175,8 @@ LSQ::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; + dcachePort.setName(name()); + for (int tid=0; tid < numThreads; tid++) { thread[tid].setCPU(cpu_ptr); } diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 4d7a8350b6..a76a73f0c5 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -64,6 +64,7 @@ class LSQUnit { typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQ LSQ; typedef typename Impl::CPUPol::IssueStruct IssueStruct; public: @@ -71,17 +72,12 @@ class LSQUnit { LSQUnit(); /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, + void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id); /** Returns the name of the LSQ unit. */ std::string name() const; - /** Returns the dcache port. - * @todo: Remove this once the port moves up to the LSQ level. - */ - Port *getDcachePort() { return dcachePort; } - /** Registers statistics. */ void regStats(); @@ -92,6 +88,10 @@ class LSQUnit { void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port) + { dcachePort = dcache_port; } + /** Switches out LSQ unit. */ void switchOut(); @@ -211,6 +211,9 @@ class LSQUnit { !storeQueue[storeWBIdx].completed && !isStoreBlocked; } + /** Handles doing the retry. */ + void recvRetry(); + private: /** Writes back the instruction, sending it to IEW. */ void writeback(DynInstPtr &inst, PacketPtr pkt); @@ -221,9 +224,6 @@ class LSQUnit { /** Completes the store at the specified index. */ void completeStore(int store_idx); - /** Handles doing the retry. */ - void recvRetry(); - /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx); /** Decrements the given store index (circular queue). */ @@ -244,54 +244,11 @@ class LSQUnit { /** Pointer to the IEW stage. */ IEW *iewStage; - /** Pointer to memory object. */ - MemObject *mem; + /** Pointer to the LSQ. */ + LSQ *lsq; - /** DcachePort class for this LSQ Unit. Handles doing the - * communication with the cache/memory. - * @todo: Needs to be moved to the LSQ level and have some sort - * of arbitration. - */ - class DcachePort : public Port - { - protected: - /** Pointer to CPU. */ - O3CPU *cpu; - /** Pointer to LSQ. */ - LSQUnit *lsq; - - public: - /** Default constructor. */ - DcachePort(O3CPU *_cpu, LSQUnit *_lsq) - : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) - { } - - protected: - /** Atomic version of receive. Panics. */ - virtual Tick recvAtomic(PacketPtr pkt); - - /** Functional version of receive. Panics. */ - virtual void recvFunctional(PacketPtr pkt); - - /** Receives status change. Other than range changing, panics. */ - virtual void recvStatusChange(Status status); - - /** Returns the address ranges of this device. */ - virtual void getDeviceAddressRanges(AddrRangeList &resp, - AddrRangeList &snoop) - { resp.clear(); snoop.clear(); } - - /** Timing version of receive. Handles writing back and - * completing the load or store that has returned from - * memory. */ - virtual bool recvTiming(PacketPtr pkt); - - /** Handles doing a retry of the previous send. */ - virtual void recvRetry(); - }; - - /** Pointer to the D-cache. */ - DcachePort *dcachePort; + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ class LSQSenderState : public Packet::SenderState @@ -658,7 +615,7 @@ LSQUnit::read(Request *req, T &data, int load_idx) } // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", load_inst->seqNum, load_inst->readPC()); assert(!load_inst->memData); @@ -666,9 +623,6 @@ LSQUnit::read(Request *req, T &data, int load_idx) ++usedPorts; - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - load_inst->readPC()); - PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); data_pkt->dataStatic(load_inst->memData); @@ -678,8 +632,18 @@ LSQUnit::read(Request *req, T &data, int load_idx) state->inst = load_inst; data_pkt->senderState = state; - // if we have a cache, do cache access too - if (!dcachePort->sendTiming(data_pkt)) { + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + if (!dcachePort->sendTiming(data_pkt)) { + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { ++lsqCacheBlocked; // There's an older load that's already going to squash. if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 8e951534f2..85b150cd97 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -31,6 +31,7 @@ #include "config/use_checker.hh" +#include "cpu/o3/lsq.hh" #include "cpu/o3/lsq_unit.hh" #include "base/str.hh" #include "mem/packet.hh" @@ -95,46 +96,6 @@ LSQUnit::completeDataAccess(PacketPtr pkt) delete pkt; } -template -Tick -LSQUnit::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} - -template -void -LSQUnit::DcachePort::recvFunctional(PacketPtr pkt) -{ - panic("O3CPU doesn't expect recvFunctional callback!"); -} - -template -void -LSQUnit::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) - return; - - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template -bool -LSQUnit::DcachePort::recvTiming(PacketPtr pkt) -{ - lsq->completeDataAccess(pkt); - return true; -} - -template -void -LSQUnit::DcachePort::recvRetry() -{ - lsq->recvRetry(); -} - template LSQUnit::LSQUnit() : loads(0), stores(0), storesToWB(0), stalled(false), @@ -145,13 +106,15 @@ LSQUnit::LSQUnit() template void -LSQUnit::init(Params *params, unsigned maxLQEntries, +LSQUnit::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, unsigned id) { DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); switchedOut = false; + lsq = lsq_ptr; + lsqID = id; // Add 1 for the sentinel entry (they are circular queues). @@ -168,8 +131,6 @@ LSQUnit::init(Params *params, unsigned maxLQEntries, usedPorts = 0; cachePorts = params->cachePorts; - mem = params->mem; - memDepViolator = NULL; blockedLoadSeqNum = 0; @@ -180,7 +141,6 @@ void LSQUnit::setCPU(O3CPU *cpu_ptr) { cpu = cpu_ptr; - dcachePort = new DcachePort(cpu, this); #if USE_CHECKER if (cpu->checker) { @@ -588,7 +548,7 @@ LSQUnit::writebackStores() storeQueue[storeWBIdx].canWB && usedPorts < cachePorts) { - if (isStoreBlocked) { + if (isStoreBlocked || lsq->cacheBlocked()) { DPRINTF(LSQUnit, "Unable to write back any more stores, cache" " is blocked!\n"); break; @@ -911,6 +871,7 @@ LSQUnit::recvRetry() } else { // Still blocked! ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); } } else if (isLoadBlocked) { DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, " From c368ff0bd8d36ba001f523bd03f56f99d9ecd452 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 13 Jul 2006 15:48:17 -0400 Subject: [PATCH 147/152] add system.mem_mode = ['timing', 'atomic'] update scripts acordingly configs/test/SysPaths.py: new syspaths from nate, this one allows you to set script, binary, and disk paths like system.dir = 'aouaou' in your script configs/test/fs.py: update for system mem_mode Put small checkpoint example Make clock 1THz configs/test/test.py: src/arch/alpha/freebsd/system.cc: src/arch/alpha/linux/system.cc: src/arch/alpha/system.cc: src/arch/alpha/tru64/system.cc: src/arch/sparc/system.cc: src/python/m5/objects/System.py: src/sim/system.cc: src/sim/system.hh: update for system mem_mode src/dev/io_device.cc: Use time returned from sendAtomic to delay --HG-- extra : convert_revision : 67eedb3c84ab2584613faf88a534e793926fc92f --- configs/test/SysPaths.py | 52 +++++++++++++++----------------- configs/test/fs.py | 14 ++++++++- configs/test/test.py | 5 +++ src/arch/alpha/freebsd/system.cc | 4 +++ src/arch/alpha/linux/system.cc | 4 +++ src/arch/alpha/system.cc | 4 +++ src/arch/alpha/tru64/system.cc | 4 +++ src/arch/sparc/system.cc | 4 +++ src/dev/io_device.cc | 7 +++-- src/python/m5/objects/System.py | 3 ++ src/sim/system.cc | 11 +++++-- src/sim/system.hh | 3 ++ 12 files changed, 82 insertions(+), 33 deletions(-) diff --git a/configs/test/SysPaths.py b/configs/test/SysPaths.py index e458d52253..3f96a546f8 100644 --- a/configs/test/SysPaths.py +++ b/configs/test/SysPaths.py @@ -2,41 +2,39 @@ import os, sys from os.path import isdir, join as joinpath from os import environ as env -systemdir = None -bindir = None -diskdir = None -scriptdir = None +def disk(file): + system() + return joinpath(disk.dir, file) -def load_defaults(): - global systemdir, bindir, diskdir, scriptdir - if not systemdir: +def binary(file): + system() + return joinpath(binary.dir, file) + +def script(file): + system() + return joinpath(script.dir, file) + +def system(): + if not system.dir: try: path = env['M5_PATH'].split(':') except KeyError: path = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ] - for systemdir in path: - if os.path.isdir(systemdir): + for system.dir in path: + if os.path.isdir(system.dir): break else: raise ImportError, "Can't find a path to system files." - if not bindir: - bindir = joinpath(systemdir, 'binaries') - if not diskdir: - diskdir = joinpath(systemdir, 'disks') - if not scriptdir: - scriptdir = joinpath(systemdir, 'boot') - -def disk(file): - load_defaults() - return joinpath(diskdir, file) - -def binary(file): - load_defaults() - return joinpath(bindir, file) - -def script(file): - load_defaults() - return joinpath(scriptdir, file) + if not binary.dir: + binary.dir = joinpath(system.dir, 'binaries') + if not disk.dir: + disk.dir = joinpath(system.dir, 'disks') + if not script.dir: + script.dir = joinpath(system.dir, 'boot') +system.dir = None +binary.dir = None +disk.dir = None +script.dir = None diff --git a/configs/test/fs.py b/configs/test/fs.py index d191f70558..41c3f8cc0a 100644 --- a/configs/test/fs.py +++ b/configs/test/fs.py @@ -17,6 +17,8 @@ if args: # Base for tests is directory containing this file. test_base = os.path.dirname(__file__) +script.dir = '/z/saidi/work/m5.newmem/configs/boot' + linux_image = env.get('LINUX_IMAGE', disk('linux-latest.img')) class IdeControllerPciData(PciConfigData): @@ -187,6 +189,7 @@ class MyLinuxAlphaSystem(LinuxAlphaSystem): intrctrl = IntrControl() if options.timing: cpu = TimingSimpleCPU() + mem_mode = 'timing' else: cpu = AtomicSimpleCPU() cpu.mem = magicbus2 @@ -194,6 +197,7 @@ class MyLinuxAlphaSystem(LinuxAlphaSystem): cpu.dcache_port = magicbus2.port cpu.itb = AlphaITB() cpu.dtb = AlphaDTB() + cpu.clock = '2GHz' sim_console = SimConsole(listener=ConsoleListener(port=3456)) kernel = binary('vmlinux') pal = binary('ts_osfpal') @@ -216,7 +220,7 @@ def DualRoot(clientSystem, serverSystem): self.etherlink = EtherLink(int1 = Parent.client.tsunami.etherint[0], int2 = Parent.server.tsunami.etherint[0], dump = Parent.etherdump) - self.clock = '5GHz' + self.clock = '1THz' return self root = DualRoot( @@ -225,6 +229,14 @@ root = DualRoot( m5.instantiate(root) +#exit_event = m5.simulate(2600000000000) +#if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cpt') +# exit_event = m5.simulate(300000000000) +# if exit_event.getCause() != "user interrupt received": +# m5.checkpoint(root, 'cptA') + + exit_event = m5.simulate() print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() diff --git a/configs/test/test.py b/configs/test/test.py index a2c9f8bb04..feb44e2d1f 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -79,6 +79,11 @@ cpu.icache_port=magicbus.port cpu.dcache_port=magicbus.port system = System(physmem = mem, cpu = cpu) + +if options.timing or options.detailed: + system.mem_mode = 'timing' + + mem.port = magicbus.port root = Root(system = system) diff --git a/src/arch/alpha/freebsd/system.cc b/src/arch/alpha/freebsd/system.cc index 7cf68e0db3..8d50e16122 100644 --- a/src/arch/alpha/freebsd/system.cc +++ b/src/arch/alpha/freebsd/system.cc @@ -97,6 +97,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) Param boot_cpu_frequency; SimObjectParam physmem; + SimpleEnumParam mem_mode; Param kernel; Param console; @@ -115,6 +116,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -133,6 +136,7 @@ CREATE_SIM_OBJECT(FreebsdAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/linux/system.cc b/src/arch/alpha/linux/system.cc index 9fe63c390d..ef4e18cb55 100644 --- a/src/arch/alpha/linux/system.cc +++ b/src/arch/alpha/linux/system.cc @@ -191,6 +191,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem) Param boot_cpu_frequency; SimObjectParam physmem; + SimpleEnumParam mem_mode; Param kernel; Param console; @@ -209,6 +210,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -227,6 +230,7 @@ CREATE_SIM_OBJECT(LinuxAlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/system.cc b/src/arch/alpha/system.cc index dce7365aa0..a7e6155313 100644 --- a/src/arch/alpha/system.cc +++ b/src/arch/alpha/system.cc @@ -221,6 +221,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem) Param boot_cpu_frequency; SimObjectParam physmem; + SimpleEnumParam mem_mode; Param kernel; Param console; @@ -239,6 +240,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -257,6 +260,7 @@ CREATE_SIM_OBJECT(AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/alpha/tru64/system.cc b/src/arch/alpha/tru64/system.cc index 6c0edc1eec..3ef1e4d3cc 100644 --- a/src/arch/alpha/tru64/system.cc +++ b/src/arch/alpha/tru64/system.cc @@ -95,6 +95,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem) Param boot_cpu_frequency; SimObjectParam physmem; + SimpleEnumParam mem_mode; Param kernel; Param console; @@ -113,6 +114,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem) INIT_PARAM(boot_cpu_frequency, "frequency of the boot cpu"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(console, "file that contains the console code"), INIT_PARAM(pal, "file that contains palcode"), @@ -131,6 +134,7 @@ CREATE_SIM_OBJECT(Tru64AlphaSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->console_path = console; p->palcode = pal; diff --git a/src/arch/sparc/system.cc b/src/arch/sparc/system.cc index e197e7918b..63cbbe057e 100644 --- a/src/arch/sparc/system.cc +++ b/src/arch/sparc/system.cc @@ -141,6 +141,7 @@ SparcSystem::unserialize(Checkpoint *cp, const std::string §ion) BEGIN_DECLARE_SIM_OBJECT_PARAMS(SparcSystem) SimObjectParam physmem; + SimpleEnumParam mem_mode; Param kernel; Param reset_bin; @@ -161,6 +162,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SparcSystem) INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"), INIT_PARAM(physmem, "phsyical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings), INIT_PARAM(kernel, "file that contains the kernel code"), INIT_PARAM(reset_bin, "file that contains the reset code"), INIT_PARAM(hypervisor_bin, "file that contains the hypervisor code"), @@ -183,6 +186,7 @@ CREATE_SIM_OBJECT(SparcSystem) p->name = getInstanceName(); p->boot_cpu_frequency = boot_cpu_frequency; p->physmem = physmem; + p->mem_mode = mem_mode; p->kernel_path = kernel; p->reset_bin = reset_bin; p->hypervisor_bin = hypervisor_bin; diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index ed2862065b..660efabfde 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -287,6 +287,7 @@ DmaPort::sendDma(Packet *pkt, bool front) // some kind of selction between access methods // more work is going to have to be done to make // switching actually work + System::MemoryMode state = sys->getMemoryMode(); if (state == System::Timing) { DPRINTF(DMA, "Attempting to send Packet %#x with addr: %#x\n", @@ -301,15 +302,15 @@ DmaPort::sendDma(Packet *pkt, bool front) DPRINTF(DMA, "-- Done\n"); } } else if (state == System::Atomic) { - sendAtomic(pkt); + Tick lat; + lat = sendAtomic(pkt); assert(pkt->senderState); DmaReqState *state = dynamic_cast(pkt->senderState); assert(state); state->numBytes += pkt->req->getSize(); if (state->totBytes == state->numBytes) { - state->completionEvent->schedule(curTick + - (pkt->time - pkt->req->getTime()) +1); + state->completionEvent->schedule(curTick + lat); delete state; delete pkt->req; } diff --git a/src/python/m5/objects/System.py b/src/python/m5/objects/System.py index 9a1e1d690b..386f39277a 100644 --- a/src/python/m5/objects/System.py +++ b/src/python/m5/objects/System.py @@ -1,9 +1,12 @@ from m5 import build_env from m5.config import * +class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing'] + class System(SimObject): type = 'System' physmem = Param.PhysicalMemory(Parent.any, "phsyical memory") + mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in") if build_env['FULL_SYSTEM']: boot_cpu_frequency = Param.Frequency(Self.cpu[0].clock.frequency, "boot processor frequency") diff --git a/src/sim/system.cc b/src/sim/system.cc index 2780d0fdac..ad70b9b035 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -63,7 +63,7 @@ System::System(Params *p) #else page_ptr(0), #endif - _params(p) + memoryMode(p->mem_mode), _params(p) { // add self to global system list systemList.push_back(this); @@ -257,6 +257,9 @@ printSystems() System::printSystems(); } +const char *System::MemoryModeStrings[3] = {"invalid", "atomic", + "timing"}; + #if FULL_SYSTEM // In full system mode, only derived classes (e.g. AlphaLinuxSystem) @@ -269,12 +272,15 @@ DEFINE_SIM_OBJECT_CLASS_NAME("System", System) BEGIN_DECLARE_SIM_OBJECT_PARAMS(System) SimObjectParam physmem; + SimpleEnumParam mem_mode; END_DECLARE_SIM_OBJECT_PARAMS(System) BEGIN_INIT_SIM_OBJECT_PARAMS(System) - INIT_PARAM(physmem, "physical memory") + INIT_PARAM(physmem, "physical memory"), + INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)", + System::MemoryModeStrings) END_INIT_SIM_OBJECT_PARAMS(System) @@ -283,6 +289,7 @@ CREATE_SIM_OBJECT(System) System::Params *p = new System::Params; p->name = getInstanceName(); p->physmem = physmem; + p->mem_mode = mem_mode; return new System(p); } diff --git a/src/sim/system.hh b/src/sim/system.hh index 77777aed40..a1b53c2eb0 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -67,6 +67,8 @@ class System : public SimObject Timing }; + static const char *MemoryModeStrings[3]; + MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; } @@ -170,6 +172,7 @@ class System : public SimObject { std::string name; PhysicalMemory *physmem; + MemoryMode mem_mode; #if FULL_SYSTEM Tick boot_cpu_frequency; From e1b8e71500b7b66b115345eeaef7216617487456 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 13 Jul 2006 15:48:41 -0400 Subject: [PATCH 148/152] fix help when no arguments are passed to m5 --HG-- extra : convert_revision : ee6614166fd5814654309298abe5a706ff02c4c2 --- src/python/m5/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/m5/main.py b/src/python/m5/main.py index 54c54c1d5d..afe73d94ce 100644 --- a/src/python/m5/main.py +++ b/src/python/m5/main.py @@ -182,7 +182,7 @@ options = attrdict() arguments = [] def usage(exitcode=None): - print parser.help + parser.print_help() if exitcode is not None: sys.exit(exitcode) From f1a7e0d3b3dcd04d8bde65ebb7f22b5bc6f98747 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 14 Jul 2006 04:52:08 -0400 Subject: [PATCH 149/152] MIPS specific fixes ... the main thing is that SMT threads get their own stack space instead of all stacks start to space src/arch/mips/isa_traits.hh: MaxAddr is defined in config.py now src/arch/mips/process.cc: adjust process so SMT threads get their own stack space src/arch/mips/process.hh: add stack_start static variable --HG-- extra : convert_revision : 73fdf3da9831d86536651835d209806c7f0d59da --- src/arch/mips/isa_traits.hh | 2 +- src/arch/mips/process.cc | 5 ++++- src/arch/mips/process.hh | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/arch/mips/isa_traits.hh b/src/arch/mips/isa_traits.hh index ff994bef99..2f485c7fd9 100644 --- a/src/arch/mips/isa_traits.hh +++ b/src/arch/mips/isa_traits.hh @@ -129,7 +129,7 @@ namespace MipsISA template void zeroRegisters(TC *tc); - const Addr MaxAddr = (Addr)-1; +// const Addr MaxAddr = (Addr)-1; void copyRegs(ThreadContext *src, ThreadContext *dest); diff --git a/src/arch/mips/process.cc b/src/arch/mips/process.cc index 031c2030ec..cb847fe04f 100644 --- a/src/arch/mips/process.cc +++ b/src/arch/mips/process.cc @@ -41,6 +41,8 @@ using namespace std; using namespace MipsISA; +Addr MipsLiveProcess::stack_start = 0x7FFFFFFF; + MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, System *_system, int stdin_fd, int stdout_fd, int stderr_fd, std::vector &argv, std::vector &envp) @@ -49,10 +51,11 @@ MipsLiveProcess::MipsLiveProcess(const std::string &nm, ObjectFile *objFile, { // Set up stack. On MIPS, stack starts at the top of kuseg // user address space. MIPS stack grows down from here - stack_base = 0x7FFFFFFF; + stack_base = stack_start; // Set pointer for next thread stack. Reserve 8M for main stack. next_thread_stack_base = stack_base - (8 * 1024 * 1024); + stack_start = next_thread_stack_base; // Set up break point (Top of Heap) brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index b0ef203990..4baee134b5 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -50,6 +50,9 @@ class MipsLiveProcess : public LiveProcess std::vector &envp); void startup(); + + + static Addr stack_start; }; From b2c51d064bc6e40e1723e04e300b8d623ef5c3d1 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 14 Jul 2006 13:06:37 -0400 Subject: [PATCH 150/152] For now, halt context is the same as deallocating. suspend context will now take the thread off the activeThread list. src/arch/mips/isa_traits.cc: add in copy MiscRegs unimplemented function --HG-- extra : convert_revision : 3ed5320b3786f84d4bb242e3a32b6f415339c3ba --- src/arch/mips/isa_traits.cc | 6 ++++++ src/cpu/o3/cpu.cc | 32 ++++++-------------------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/arch/mips/isa_traits.cc b/src/arch/mips/isa_traits.cc index 85acc4e8cd..a8b41270ec 100644 --- a/src/arch/mips/isa_traits.cc +++ b/src/arch/mips/isa_traits.cc @@ -45,6 +45,12 @@ MipsISA::copyRegs(ThreadContext *src, ThreadContext *dest) panic("Copy Regs Not Implemented Yet\n"); } +void +MipsISA::copyMiscRegs(ThreadContext *src, ThreadContext *dest) +{ + panic("Copy Misc. Regs Not Implemented Yet\n"); +} + void MipsISA::MiscRegFile::copyMiscRegs(ThreadContext *tc) { diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 6e9b425c0e..349434c943 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -577,39 +577,19 @@ void FullO3CPU::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - unscheduleTickEvent(); + deactivateThread(); + if (activeThreads.size() == 0) + unscheduleTickEvent(); _status = Idle; -/* - //Remove From Active List, if Active - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - } -*/ } template void FullO3CPU::haltContext(int tid) { - DPRINTF(O3CPU,"[tid:%i]: Halting Thread Context", tid); -/* - //Remove From Active List, if Active - list::iterator isActive = find( - activeThreads.begin(), activeThreads.end(), tid); - - if (isActive != activeThreads.end()) { - DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n", - tid); - activeThreads.erase(isActive); - - removeThread(tid); - } -*/ + //For now, this is the same as deallocate + DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, 1); } template From 07186de5a1b1df55a31329b2ca9c53ad168438ff Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 14 Jul 2006 13:22:35 -0400 Subject: [PATCH 151/152] forgot tid --HG-- extra : convert_revision : 272ef8f9cd0802770edc4dcef2c26dc44de71e47 --- src/cpu/o3/cpu.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 349434c943..b407f4fcc9 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -577,7 +577,7 @@ void FullO3CPU::suspendContext(int tid) { DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid); - deactivateThread(); + deactivateThread(tid); if (activeThreads.size() == 0) unscheduleTickEvent(); _status = Idle; From fe9e851e8c0a52ee412350036c94cc61c9b8dc04 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Mon, 17 Jul 2006 16:50:20 -0400 Subject: [PATCH 152/152] update test3 --HG-- extra : convert_revision : e41feeee87d1da348604a37f7349900dcbd3a4d9