From ed8564a6b9f0702a40995d95cc4da54de3d35462 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 11:02:14 -0400 Subject: [PATCH 01/17] Was having difficulty with merging the cache, reverted to an early version and will add back in the patches to make it work soon. src/mem/cache/prefetch/tagged_prefetcher_impl.hh: Trying to merge src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: src/mem/cache/cache.cc: src/mem/cache/cache.hh: src/mem/cache/cache_blk.hh: src/mem/cache/cache_builder.cc: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/coherence/coherence_protocol.hh: src/mem/cache/coherence/simple_coherence.hh: src/mem/cache/coherence/uni_coherence.cc: src/mem/cache/coherence/uni_coherence.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/blocking_buffer.hh: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/miss_queue.hh: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: src/mem/cache/miss/mshr_queue.hh: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/prefetch/base_prefetcher.hh: src/mem/cache/prefetch/ghb_prefetcher.cc: src/mem/cache/prefetch/ghb_prefetcher.hh: src/mem/cache/prefetch/stride_prefetcher.cc: src/mem/cache/prefetch/stride_prefetcher.hh: src/mem/cache/prefetch/tagged_prefetcher.hh: src/mem/cache/tags/base_tags.cc: src/mem/cache/tags/base_tags.hh: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/fa_lru.hh: src/mem/cache/tags/iic.cc: src/mem/cache/tags/iic.hh: src/mem/cache/tags/lru.cc: src/mem/cache/tags/lru.hh: src/mem/cache/tags/repl/gen.cc: src/mem/cache/tags/repl/gen.hh: src/mem/cache/tags/repl/repl.cc: src/mem/cache/tags/repl/repl.hh: src/mem/cache/tags/split.cc: src/mem/cache/tags/split.hh: src/mem/cache/tags/split_blk.hh: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lifo.hh: src/mem/cache/tags/split_lru.cc: src/mem/cache/tags/split_lru.hh: Pulling an early version of the cache into the tree due to merging issues. Will apply patches and push. --HG-- extra : convert_revision : 3276e5fb9a6272681a1690babf2b586dd0e1f380 --- src/mem/cache/base_cache.cc | 330 +++++++ src/mem/cache/base_cache.hh | 480 ++++++++++ src/mem/cache/cache.cc | 146 +++ src/mem/cache/cache.hh | 264 ++++++ src/mem/cache/cache_blk.hh | 203 ++++ src/mem/cache/cache_builder.cc | 482 ++++++++++ src/mem/cache/cache_impl.hh | 661 +++++++++++++ src/mem/cache/coherence/coherence_protocol.cc | 566 ++++++++++++ src/mem/cache/coherence/coherence_protocol.hh | 263 ++++++ src/mem/cache/coherence/simple_coherence.hh | 161 ++++ src/mem/cache/coherence/uni_coherence.cc | 89 ++ src/mem/cache/coherence/uni_coherence.hh | 136 +++ src/mem/cache/miss/blocking_buffer.cc | 261 ++++++ src/mem/cache/miss/blocking_buffer.hh | 257 ++++++ src/mem/cache/miss/miss_queue.cc | 736 +++++++++++++++ src/mem/cache/miss/miss_queue.hh | 349 +++++++ src/mem/cache/miss/mshr.cc | 182 ++++ src/mem/cache/miss/mshr.hh | 179 ++++ src/mem/cache/miss/mshr_queue.cc | 269 ++++++ src/mem/cache/miss/mshr_queue.hh | 239 +++++ src/mem/cache/prefetch/base_prefetcher.cc | 250 +++++ src/mem/cache/prefetch/base_prefetcher.hh | 117 +++ src/mem/cache/prefetch/ghb_prefetcher.cc | 54 ++ src/mem/cache/prefetch/ghb_prefetcher.hh | 114 +++ src/mem/cache/prefetch/stride_prefetcher.cc | 54 ++ src/mem/cache/prefetch/stride_prefetcher.hh | 149 +++ src/mem/cache/prefetch/tagged_prefetcher.hh | 71 ++ .../cache/prefetch/tagged_prefetcher_impl.hh | 4 +- src/mem/cache/tags/base_tags.cc | 91 ++ src/mem/cache/tags/base_tags.hh | 143 +++ src/mem/cache/tags/fa_lru.cc | 334 +++++++ src/mem/cache/tags/fa_lru.hh | 346 +++++++ src/mem/cache/tags/iic.cc | 869 ++++++++++++++++++ src/mem/cache/tags/iic.hh | 574 ++++++++++++ src/mem/cache/tags/lru.cc | 310 +++++++ src/mem/cache/tags/lru.hh | 327 +++++++ src/mem/cache/tags/repl/gen.cc | 277 ++++++ src/mem/cache/tags/repl/gen.hh | 247 +++++ src/mem/cache/tags/repl/repl.cc | 43 + src/mem/cache/tags/repl/repl.hh | 129 +++ src/mem/cache/tags/split.cc | 478 ++++++++++ src/mem/cache/tags/split.hh | 335 +++++++ src/mem/cache/tags/split_blk.hh | 68 ++ src/mem/cache/tags/split_lifo.cc | 405 ++++++++ src/mem/cache/tags/split_lifo.hh | 350 +++++++ src/mem/cache/tags/split_lru.cc | 331 +++++++ src/mem/cache/tags/split_lru.hh | 333 +++++++ 47 files changed, 13054 insertions(+), 2 deletions(-) create mode 100644 src/mem/cache/base_cache.cc create mode 100644 src/mem/cache/base_cache.hh create mode 100644 src/mem/cache/cache.cc create mode 100644 src/mem/cache/cache.hh create mode 100644 src/mem/cache/cache_blk.hh create mode 100644 src/mem/cache/cache_builder.cc create mode 100644 src/mem/cache/cache_impl.hh create mode 100644 src/mem/cache/coherence/coherence_protocol.cc create mode 100644 src/mem/cache/coherence/coherence_protocol.hh create mode 100644 src/mem/cache/coherence/simple_coherence.hh create mode 100644 src/mem/cache/coherence/uni_coherence.cc create mode 100644 src/mem/cache/coherence/uni_coherence.hh create mode 100644 src/mem/cache/miss/blocking_buffer.cc create mode 100644 src/mem/cache/miss/blocking_buffer.hh create mode 100644 src/mem/cache/miss/miss_queue.cc create mode 100644 src/mem/cache/miss/miss_queue.hh create mode 100644 src/mem/cache/miss/mshr.cc create mode 100644 src/mem/cache/miss/mshr.hh create mode 100644 src/mem/cache/miss/mshr_queue.cc create mode 100644 src/mem/cache/miss/mshr_queue.hh create mode 100644 src/mem/cache/prefetch/base_prefetcher.cc create mode 100644 src/mem/cache/prefetch/base_prefetcher.hh create mode 100644 src/mem/cache/prefetch/ghb_prefetcher.cc create mode 100644 src/mem/cache/prefetch/ghb_prefetcher.hh create mode 100644 src/mem/cache/prefetch/stride_prefetcher.cc create mode 100644 src/mem/cache/prefetch/stride_prefetcher.hh create mode 100644 src/mem/cache/prefetch/tagged_prefetcher.hh create mode 100644 src/mem/cache/tags/base_tags.cc create mode 100644 src/mem/cache/tags/base_tags.hh create mode 100644 src/mem/cache/tags/fa_lru.cc create mode 100644 src/mem/cache/tags/fa_lru.hh create mode 100644 src/mem/cache/tags/iic.cc create mode 100644 src/mem/cache/tags/iic.hh create mode 100644 src/mem/cache/tags/lru.cc create mode 100644 src/mem/cache/tags/lru.hh create mode 100644 src/mem/cache/tags/repl/gen.cc create mode 100644 src/mem/cache/tags/repl/gen.hh create mode 100644 src/mem/cache/tags/repl/repl.cc create mode 100644 src/mem/cache/tags/repl/repl.hh create mode 100644 src/mem/cache/tags/split.cc create mode 100644 src/mem/cache/tags/split.hh create mode 100644 src/mem/cache/tags/split_blk.hh create mode 100644 src/mem/cache/tags/split_lifo.cc create mode 100644 src/mem/cache/tags/split_lifo.hh create mode 100644 src/mem/cache/tags/split_lru.cc create mode 100644 src/mem/cache/tags/split_lru.hh diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc new file mode 100644 index 0000000000..10a49edb1f --- /dev/null +++ b/src/mem/cache/base_cache.cc @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definition of BaseCache functions. + */ + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" +#include "cpu/base.hh" + +using namespace std; + +BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, + bool _isCpuSide) + : Port(_name), cache(_cache), isCpuSide(_isCpuSide) +{ + blocked = false; + //Start ports at null if more than one is created we should panic + cpuSidePort = NULL; + memSidePort = NULL; +} + +bool +BaseCache::CachePort::recvStatusChange(Port::Status status) +{ + cache->recvStatusChange(status, isCpuSide); +} + +void +BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) +{ + cache->getAddressRanges(resp, snoop); +} + +int +BaseCache::CachePort::deviceBlockSize() +{ + return cache->getBlockSize(); +} + +bool +BaseCache::CachePort::recvTiming(Packet *pkt) +{ + return cache->doTimingAccess(pkt, this, isCpuSide); +} + +Tick +BaseCache::CachePort::recvAtomic(Packet *pkt) +{ + return cache->doAtomicAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::recvFunctional(Packet *pkt) +{ + cache->doFunctionalAccess(pkt, isCpuSide); +} + +void +BaseCache::CachePort::setBlocked() +{ + blocked = true; +} + +void +BaseCache::CachePort::clearBlocked() +{ + blocked = false; +} + +Port* +BaseCache::getPort(const std::string &if_name) +{ + if(if_name == "cpu_side") + { + if(cpuSidePort != NULL) + panic("Already have a cpu side for this cache\n"); + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if(if_name == "mem_side") + { + if(memSidePort != NULL) + panic("Already have a mem side for this cache\n"); + memSidePort = new CachePort(name() + "-mem_side_port", this, false); + return memSidePort; + } + else panic("Port name %s unrecognized\n", if_name); +} + +void +BaseCache::regStats() +{ + using namespace Stats; + + // Hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + hits[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_hits") + .desc("number of " + cstr + " hits") + .flags(total | nozero | nonan) + ; + } + + demandHits + .name(name() + ".demand_hits") + .desc("number of demand (read+write) hits") + .flags(total) + ; + demandHits = hits[Read] + hits[Write]; + + overallHits + .name(name() + ".overall_hits") + .desc("number of overall hits") + .flags(total) + ; + overallHits = demandHits + hits[Soft_Prefetch] + hits[Hard_Prefetch] + + hits[Writeback]; + + // Miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + misses[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_misses") + .desc("number of " + cstr + " misses") + .flags(total | nozero | nonan) + ; + } + + demandMisses + .name(name() + ".demand_misses") + .desc("number of demand (read+write) misses") + .flags(total) + ; + demandMisses = misses[Read] + misses[Write]; + + overallMisses + .name(name() + ".overall_misses") + .desc("number of overall misses") + .flags(total) + ; + overallMisses = demandMisses + misses[Soft_Prefetch] + + misses[Hard_Prefetch] + misses[Writeback]; + + // Miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + missLatency[access_idx] + .init(maxThreadsPerCPU) + .name(name() + "." + cstr + "_miss_latency") + .desc("number of " + cstr + " miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMissLatency + .name(name() + ".demand_miss_latency") + .desc("number of demand (read+write) miss cycles") + .flags(total) + ; + demandMissLatency = missLatency[Read] + missLatency[Write]; + + overallMissLatency + .name(name() + ".overall_miss_latency") + .desc("number of overall miss cycles") + .flags(total) + ; + overallMissLatency = demandMissLatency + missLatency[Soft_Prefetch] + + missLatency[Hard_Prefetch]; + + // access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + accesses[access_idx] + .name(name() + "." + cstr + "_accesses") + .desc("number of " + cstr + " accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + + accesses[access_idx] = hits[access_idx] + misses[access_idx]; + } + + demandAccesses + .name(name() + ".demand_accesses") + .desc("number of demand (read+write) accesses") + .flags(total) + ; + demandAccesses = demandHits + demandMisses; + + overallAccesses + .name(name() + ".overall_accesses") + .desc("number of overall (read+write) accesses") + .flags(total) + ; + overallAccesses = overallHits + overallMisses; + + // miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + missRate[access_idx] + .name(name() + "." + cstr + "_miss_rate") + .desc("miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + missRate[access_idx] = misses[access_idx] / accesses[access_idx]; + } + + demandMissRate + .name(name() + ".demand_miss_rate") + .desc("miss rate for demand accesses") + .flags(total) + ; + demandMissRate = demandMisses / demandAccesses; + + overallMissRate + .name(name() + ".overall_miss_rate") + .desc("miss rate for overall accesses") + .flags(total) + ; + overallMissRate = overallMisses / overallAccesses; + + // miss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMissLatency[access_idx] + .name(name() + "." + cstr + "_avg_miss_latency") + .desc("average " + cstr + " miss latency") + .flags(total | nozero | nonan) + ; + + avgMissLatency[access_idx] = + missLatency[access_idx] / misses[access_idx]; + } + + demandAvgMissLatency + .name(name() + ".demand_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + demandAvgMissLatency = demandMissLatency / demandMisses; + + overallAvgMissLatency + .name(name() + ".overall_avg_miss_latency") + .desc("average overall miss latency") + .flags(total) + ; + overallAvgMissLatency = overallMissLatency / overallMisses; + + blocked_cycles.init(NUM_BLOCKED_CAUSES); + blocked_cycles + .name(name() + ".blocked_cycles") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + + blocked_causes.init(NUM_BLOCKED_CAUSES); + blocked_causes + .name(name() + ".blocked") + .desc("number of cycles access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked + .name(name() + ".avg_blocked_cycles") + .desc("average number of cycles each access was blocked") + .subname(Blocked_NoMSHRs, "no_mshrs") + .subname(Blocked_NoTargets, "no_targets") + ; + + avg_blocked = blocked_cycles / blocked_causes; + + fastWrites + .name(name() + ".fast_writes") + .desc("number of fast writes performed") + ; + + cacheCopies + .name(name() + ".cache_copies") + .desc("number of cache copies performed") + ; +} diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh new file mode 100644 index 0000000000..0170b02494 --- /dev/null +++ b/src/mem/cache/base_cache.hh @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declares a basic cache interface BaseCache. + */ + +#ifndef __BASE_CACHE_HH__ +#define __BASE_CACHE_HH__ + +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/trace.hh" +#include "mem/mem_object.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" + +/** + * Reasons for Caches to be Blocked. + */ +enum BlockedCause{ + Blocked_NoMSHRs, + Blocked_NoTargets, + Blocked_NoWBBuffers, + Blocked_Coherence, + Blocked_Copy, + NUM_BLOCKED_CAUSES +}; + +/** + * Reasons for cache to request a bus. + */ +enum RequestCause{ + Request_MSHR, + Request_WB, + Request_Coherence, + Request_PF +}; + +/** + * A basic cache interface. Implements some common functions for speed. + */ +class BaseCache : public MemObject +{ + class CachePort : public Port + { + BaseCache *cache; + + public: + CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); + + private: + virtual bool recvTiming(Packet *pkt); + + virtual Tick recvAtomic(Packet *pkt); + + virtual void recvFunctional(Packet *pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop); + + virtual int deviceBlockSize(); + + void setBlocked(); + + void clearBlocked(); + + bool blocked; + + bool isCpuSide; + }; + + struct CacheEvent : public Event + { + Packet *pkt; + CachePort *cachePort; + + CacheResponseEvent(Packet *pkt, CachePort *cachePort); + void process(); + const char *description(); + } + + protected: + CachePort *cpuSidePort; + CachePort *memSidePort; + + public: + virtual Port *getPort(const std::string &if_name); + + private: + //To be defined in cache_impl.hh not in base class + virtual bool doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide); + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); + virtual void recvStatusChange(Port::Status status, bool isCpuSide); + + /** + * Bit vector of the blocking reasons for the access path. + * @sa #BlockedCause + */ + uint8_t blocked; + + /** + * Bit vector for the blocking reasons for the snoop path. + * @sa #BlockedCause + */ + uint8_t blockedSnoop; + + /** + * Bit vector for the outstanding requests for the master interface. + */ + uint8_t masterRequests; + + /** + * Bit vector for the outstanding requests for the slave interface. + */ + uint8_t slaveRequests; + + protected: + + /** True if this cache is connected to the CPU. */ + bool topLevelCache; + + /** Stores time the cache blocked for statistics. */ + Tick blockedCycle; + + /** Block size of this cache */ + const int blkSize; + + /** The number of misses to trigger an exit event. */ + Counter missCount; + + public: + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of hits per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> hits[NUM_MEM_CMDS]; + /** Number of hits for demand accesses. */ + Stats::Formula demandHits; + /** Number of hit for all accesses. */ + Stats::Formula overallHits; + + /** Number of misses per thread for each type of command. @sa Packet::Command */ + Stats::Vector<> misses[NUM_MEM_CMDS]; + /** Number of misses for demand accesses. */ + Stats::Formula demandMisses; + /** Number of misses for all accesses. */ + Stats::Formula overallMisses; + + /** + * Total number of cycles per thread/command spent waiting for a miss. + * Used to calculate the average miss latency. + */ + Stats::Vector<> missLatency[NUM_MEM_CMDS]; + /** Total number of cycles spent waiting for demand misses. */ + Stats::Formula demandMissLatency; + /** Total number of cycles spent waiting for all misses. */ + Stats::Formula overallMissLatency; + + /** The number of accesses per command and thread. */ + Stats::Formula accesses[NUM_MEM_CMDS]; + /** The number of demand accesses. */ + Stats::Formula demandAccesses; + /** The number of overall accesses. */ + Stats::Formula overallAccesses; + + /** The miss rate per command and thread. */ + Stats::Formula missRate[NUM_MEM_CMDS]; + /** The miss rate of all demand accesses. */ + Stats::Formula demandMissRate; + /** The miss rate for all accesses. */ + Stats::Formula overallMissRate; + + /** The average miss latency per command and thread. */ + Stats::Formula avgMissLatency[NUM_MEM_CMDS]; + /** The average miss latency for demand misses. */ + Stats::Formula demandAvgMissLatency; + /** The average miss latency for all misses. */ + Stats::Formula overallAvgMissLatency; + + /** The total number of cycles blocked for each blocked cause. */ + Stats::Vector<> blocked_cycles; + /** The number of times this cache blocked for each blocked cause. */ + Stats::Vector<> blocked_causes; + + /** The average number of cycles blocked for each blocked cause. */ + Stats::Formula avg_blocked; + + /** The number of fast writes (WH64) performed. */ + Stats::Scalar<> fastWrites; + + /** The number of cache copies performed. */ + Stats::Scalar<> cacheCopies; + + /** + * @} + */ + + /** + * Register stats for this object. + */ + virtual void regStats(); + + public: + + class Params + { + public: + /** List of address ranges of this cache. */ + std::vector > addrRange; + /** The hit latency for this cache. */ + int hitLatency; + /** The block size of this cache. */ + int blkSize; + /** + * The maximum number of misses this cache should handle before + * ending the simulation. + */ + Counter maxMisses; + + /** + * Construct an instance of this parameter class. + */ + Params(std::vector > addr_range, + int hit_latency, int _blkSize, Counter max_misses) + : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize), + maxMisses(max_misses) + { + } + }; + + /** + * Create and initialize a basic cache object. + * @param name The name of this cache. + * @param hier_params Pointer to the HierParams object for this hierarchy + * of this cache. + * @param params The parameter object for this BaseCache. + */ + BaseCache(const std::string &name, HierParams *hier_params, Params ¶ms) + : BaseMem(name, hier_params, params.hitLatency, params.addrRange), + blocked(0), blockedSnoop(0), masterRequests(0), slaveRequests(0), + topLevelCache(false), blkSize(params.blkSize), + missCount(params.maxMisses) + { + } + + /** + * Query block size of a cache. + * @return The block size + */ + int getBlockSize() const + { + return blkSize; + } + + /** + * Returns true if this cache is connect to the CPU. + * @return True if this is a L1 cache. + */ + bool isTopLevel() + { + return topLevelCache; + } + + /** + * Returns true if the cache is blocked for accesses. + */ + bool isBlocked() + { + return blocked != 0; + } + + /** + * Returns true if the cache is blocked for snoops. + */ + bool isBlockedForSnoop() + { + return blockedSnoop != 0; + } + + /** + * Marks the access path of the cache as blocked for the given cause. This + * also sets the blocked flag in the slave interface. + * @param cause The reason for the cache blocking. + */ + void setBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + if (blocked == 0) { + blocked_causes[cause]++; + blockedCycle = curTick; + } + blocked |= flag; + DPRINTF(Cache,"Blocking for cause %s\n", cause); + cpuSidePort->setBlocked(); + } + + /** + * Marks the snoop path of the cache as blocked for the given cause. This + * also sets the blocked flag in the master interface. + * @param cause The reason to block the snoop path. + */ + void setBlockedForSnoop(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blockedSnoop |= flag; + memSidePort->setBlocked(); + } + + /** + * Marks the cache as unblocked for the given cause. This also clears the + * blocked flags in the appropriate interfaces. + * @param cause The newly unblocked cause. + * @warning Calling this function can cause a blocked request on the bus to + * access the cache. The cache must be in a state to handle that request. + */ + void clearBlocked(BlockedCause cause) + { + uint8_t flag = 1 << cause; + blocked &= ~flag; + blockedSnoop &= ~flag; + DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n", + cause, blocked); + if (!isBlocked()) { + blocked_cycles[cause] += curTick - blockedCycle; + DPRINTF(Cache,"Unblocking from all causes\n"); + cpuSidePort->clearBlocked(); + } + if (!isBlockedForSnoop()) { + memSidePort->clearBlocked(); + } + + } + + /** + * True if the master bus should be requested. + * @return True if there are outstanding requests for the master bus. + */ + bool doMasterRequest() + { + return masterRequests != 0; + } + + /** + * Request the master bus for the given cause and time. + * @param cause The reason for the request. + * @param time The time to make the request. + */ + void setMasterRequest(RequestCause cause, Tick time) + { + uint8_t flag = 1<pktuest(time); + } + + /** + * Clear the master bus request for the given cause. + * @param cause The request reason to clear. + */ + void clearMasterRequest(RequestCause cause) + { + uint8_t flag = 1<pktuest(time); + } + + /** + * Clear the slave bus request for the given reason. + * @param cause The request reason to clear. + */ + void clearSlaveRequest(RequestCause cause) + { + uint8_t flag = 1<respond(pkt,time); + } + + /** + * Send a reponse to the slave interface and calculate miss latency. + * @param req The request to respond to. + * @param time The time the response is ready. + */ + void respondToMiss(Packet *pkt, Tick time) + { + if (!pkt->isUncacheable()) { + missLatency[pkt->cmd.toIndex()][pkt->thread_num] += time - pkt->time; + } + assert("Implement\n" && 0); +// si->respond(pkt,time); + } + + /** + * Suppliess the data if cache to cache transfers are enabled. + * @param req The bus transaction to fulfill. + */ + void respondToSnoop(Packet *pkt) + { + assert("Implement\n" && 0); +// mi->respond(pkt,curTick + hitLatency); + } + + /** + * Notification from master interface that a address range changed. Nothing + * to do for a cache. + */ + void rangeChange() {} +}; + +#endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc new file mode 100644 index 0000000000..db66c096e6 --- /dev/null +++ b/src/mem/cache/cache.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Lisa Hsu + * Kevin Lim + */ + +/** + * @file + * Cache template instantiations. + */ + +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" + +#include "mem/cache/tags/cache_tags.hh" + +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +#include "mem/cache/cache_impl.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + + +#if defined(USE_CACHE_FALRU) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_IIC) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_LRU) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#if defined(USE_LZSS_COMPRESSION) +template class Cache, BlockingBuffer, SimpleCoherence>; +template class Cache, BlockingBuffer, UniCoherence>; +template class Cache, MissQueue, SimpleCoherence>; +template class Cache, MissQueue, UniCoherence>; +#endif +#endif + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh new file mode 100644 index 0000000000..dcb22a99c9 --- /dev/null +++ b/src/mem/cache/cache.hh @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Steve Reinhardt + */ + +/** + * @file + * Describes a cache based on template policies. + */ + +#ifndef __CACHE_HH__ +#define __CACHE_HH__ + +#include "base/misc.hh" // fatal, panic, and warn +#include "cpu/smt.hh" // SMT_MAX_THREADS + +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +// forward declarations +class Bus; +class ExecContext; + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class Cache : public BaseCache +{ + public: + /** Define the type of cache block to use. */ + typedef typename TagStore::BlkType BlkType; + + bool prefetchAccess; + protected: + + /** Tag and data Storage */ + TagStore *tags; + /** Miss and Writeback handler */ + Buffering *missQueue; + /** Coherence protocol. */ + Coherence *coherence; + + /** Prefetcher */ + Prefetcher *prefetcher; + + /** Do fast copies in this cache. */ + bool doCopy; + + /** Block on a delayed copy. */ + bool blockOnCopy; + + /** + * The clock ratio of the outgoing bus. + * Used for calculating critical word first. + */ + int busRatio; + + /** + * The bus width in bytes of the outgoing bus. + * Used for calculating critical word first. + */ + int busWidth; + + /** + * A permanent mem req to always be used to cause invalidations. + * Used to append to target list, to cause an invalidation. + */ + Packet * invalidatePkt; + + /** + * Temporarily move a block into a MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(Addr addr, int asid); + + /** + * Temporarily move a block into an existing MSHR. + * @todo Remove this when LSQ/SB are fixed and implemented in memtest. + */ + void pseudoFill(MSHR *mshr); + + public: + + class Params + { + public: + TagStore *tags; + Buffering *missQueue; + Coherence *coherence; + bool doCopy; + bool blockOnCopy; + BaseCache::Params baseParams; + Bus *in; + Bus *out; + Prefetcher *prefetcher; + bool prefetchAccess; + + Params(TagStore *_tags, Buffering *mq, Coherence *coh, + bool do_copy, BaseCache::Params params, Bus * in_bus, + Bus * out_bus, Prefetcher *_prefetcher, + bool prefetch_access) + : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy), + blockOnCopy(false), baseParams(params), in(in_bus), out(out_bus), + prefetcher(_prefetcher), prefetchAccess(prefetch_access) + { + } + }; + + /** Instantiates a basic cache object. */ + Cache(const std::string &_name, HierParams *hier_params, Params ¶ms); + + void regStats(); + + /** + * Performs the access specified by the request. + * @param req The request to perform. + * @return The result of the access. + */ + MemAccessResult access(Packet * &pkt); + + /** + * Selects a request to send on the bus. + * @return The memory request to service. + */ + Packet * getPacket(); + + /** + * Was the request was sent successfully? + * @param req The request. + * @param success True if the request was sent successfully. + */ + void sendResult(Packet * &pkt, bool success); + + /** + * Handles a response (cache line fill/write ack) from the bus. + * @param req The request being responded to. + */ + void handleResponse(Packet * &pkt); + + /** + * Start handling a copy transaction. + * @param req The copy request to perform. + */ + void startCopy(Packet * &pkt); + + /** + * Handle a delayed copy transaction. + * @param req The delayed copy request to continue. + * @param addr The address being responded to. + * @param blk The block of the current response. + * @param mshr The mshr being handled. + */ + void handleCopy(Packet * &pkt, Addr addr, BlkType *blk, MSHR *mshr); + + /** + * Selects a coherence message to forward to lower levels of the hierarchy. + * @return The coherence message to forward. + */ + Packet * getCoherenceReq(); + + /** + * Snoops bus transactions to maintain coherence. + * @param req The current bus transaction. + */ + void snoop(Packet * &pkt); + + void snoopResponse(Packet * &pkt); + + /** + * Invalidates the block containing address if found. + * @param addr The address to look for. + * @param asid The address space ID of the address. + * @todo Is this function necessary? + */ + void invalidateBlk(Addr addr, int asid); + + /** + * Aquash all requests associated with specified thread. + * intended for use by I-cache. + * @param thread_number The thread to squash. + */ + void squash(int thread_number) + { + missQueue->squash(thread_number); + } + + /** + * Return the number of outstanding misses in a Cache. + * Default returns 0. + * + * @retval unsigned The number of missing still outstanding. + */ + unsigned outstandingMisses() const + { + return missQueue->getMisses(); + } + + /** + * Send a response to the slave interface. + * @param req The request being responded to. + * @param time The time the response is ready. + */ + void respond(Packet * &pkt, Tick time) + { + si->respond(pkt,time); + } + + /** + * Perform the access specified in the request and return the estimated + * time of completion. This function can either update the hierarchy state + * or just perform the access wherever the data is found depending on the + * state of the update flag. + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick probe(Packet * &pkt, bool update); + + /** + * Snoop for the provided request in the cache and return the estimated + * time of completion. + * @todo Can a snoop probe not change state? + * @param req The memory request to satisfy + * @param update If true, update the hierarchy, otherwise just perform the + * request. + * @return The estimated completion time. + */ + Tick snoopProbe(Packet * &pkt, bool update); +}; + +#endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh new file mode 100644 index 0000000000..cf1bd20e2e --- /dev/null +++ b/src/mem/cache/cache_blk.hh @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definitions of a simple cache block class. + */ + +#ifndef __CACHE_BLK_HH__ +#define __CACHE_BLK_HH__ + +#include "sim/root.hh" // for Tick +#include "arch/isa_traits.hh" // for Addr +#include "cpu/exec_context.hh" + +/** + * Cache block status bit assignments + */ +enum CacheBlkStatusBits { + /** valid, readable */ + BlkValid = 0x01, + /** write permission */ + BlkWritable = 0x02, + /** dirty (modified) */ + BlkDirty = 0x04, + /** compressed */ + BlkCompressed = 0x08, + /** block was referenced */ + BlkReferenced = 0x10, + /** block was a hardware prefetch yet unaccessed*/ + BlkHWPrefetched = 0x20 +}; + +/** + * A Basic Cache block. + * Contains the tag, status, and a pointer to data. + */ +class CacheBlk +{ + public: + /** The address space ID of this block. */ + int asid; + /** Data block tag value. */ + Addr tag; + /** + * Contains a copy of the data in this block for easy access. This is used + * for efficient execution when the data could be actually stored in + * another format (COW, compressed, sub-blocked, etc). In all cases the + * data stored here should be kept consistant with the actual data + * referenced by this block. + */ + uint8_t *data; + /** the number of bytes stored in this block. */ + int size; + + /** block state: OR of CacheBlkStatusBit */ + typedef unsigned State; + + /** The current status of this block. @sa CacheBlockStatusBits */ + State status; + + /** Which curTick will this block be accessable */ + Tick whenReady; + + /** Save the exec context so that writebacks can use them. */ + ExecContext *xc; + + /** + * The set this block belongs to. + * @todo Move this into subclasses when we fix CacheTags to use them. + */ + int set; + + /** Number of references to this block since it was brought in. */ + int refCount; + + CacheBlk() + : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), xc(0), + set(-1), refCount(0) + {} + + /** + * Copy the state of the given block into this one. + * @param rhs The block to copy. + * @return a const reference to this block. + */ + const CacheBlk& operator=(const CacheBlk& rhs) + { + asid = rhs.asid; + tag = rhs.tag; + data = rhs.data; + size = rhs.size; + status = rhs.status; + whenReady = rhs.whenReady; + xc = rhs.xc; + set = rhs.set; + refCount = rhs.refCount; + return *this; + } + + /** + * Checks the write permissions of this block. + * @return True if the block is writable. + */ + bool isWritable() const + { + const int needed_bits = BlkWritable | BlkValid; + return (status & needed_bits) == needed_bits; + } + + /** + * Checks that a block is valid (readable). + * @return True if the block is valid. + */ + bool isValid() const + { + return (status & BlkValid) != 0; + } + + /** + * Check to see if a block has been written. + * @return True if the block is dirty. + */ + bool isModified() const + { + return (status & BlkDirty) != 0; + } + + /** + * Check to see if this block contains compressed data. + * @return True iF the block's data is compressed. + */ + bool isCompressed() const + { + return (status & BlkCompressed) != 0; + } + + /** + * Check if this block has been referenced. + * @return True if the block has been referenced. + */ + bool isReferenced() const + { + return (status & BlkReferenced) != 0; + } + + /** + * Check if this block was the result of a hardware prefetch, yet to + * be touched. + * @return True if the block was a hardware prefetch, unaccesed. + */ + bool isPrefetch() const + { + return (status & BlkHWPrefetched) != 0; + } + + +}; + +/** + * Output a CacheBlk to the given ostream. + * @param out The stream for the output. + * @param blk The cache block to print. + * + * @return The output stream. + */ +inline std::ostream & +operator<<(std::ostream &out, const CacheBlk &blk) +{ + out << std::hex << std::endl; + out << " Tag: " << blk.tag << std::endl; + out << " Status: " << blk.status << std::endl; + + return(out << std::dec); +} + +#endif //__CACHE_BLK_HH__ diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc new file mode 100644 index 0000000000..e3efb9bc36 --- /dev/null +++ b/src/mem/cache/cache_builder.cc @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * @file + * Simobject instatiation of caches. + */ +#include + +// Must be included first to determine which caches we want +#include "mem/config/cache.hh" +#include "mem/config/compression.hh" +#include "mem/config/prefetch.hh" + +#include "mem/cache/base_cache.hh" +#include "mem/cache/cache.hh" +#include "mem/bus/bus.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +// Tag Templates +#if defined(USE_CACHE_LRU) +#include "mem/cache/tags/lru.hh" +#endif + +#if defined(USE_CACHE_FALRU) +#include "mem/cache/tags/fa_lru.hh" +#endif + +#if defined(USE_CACHE_IIC) +#include "mem/cache/tags/iic.hh" +#endif + +#if defined(USE_CACHE_SPLIT) +#include "mem/cache/tags/split.hh" +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#include "mem/cache/tags/split_lifo.hh" +#endif + +// Compression Templates +#include "base/compression/null_compression.hh" +#if defined(USE_LZSS_COMPRESSION) +#include "base/compression/lzss_compression.hh" +#endif + +// CacheTags Templates +#include "mem/cache/tags/cache_tags.hh" + +// MissQueue Templates +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +// Coherence Templates +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/coherence/simple_coherence.hh" + +// Bus Interfaces +#include "mem/bus/slave_interface.hh" +#include "mem/bus/master_interface.hh" +#include "mem/memory_interface.hh" + +#include "mem/trace/mem_trace_writer.hh" + +//Prefetcher Headers +#if defined(USE_GHB) +#include "mem/cache/prefetch/ghb_prefetcher.hh" +#endif +#if defined(USE_TAGGED) +#include "mem/cache/prefetch/tagged_prefetcher.hh" +#endif +#if defined(USE_STRIDED) +#include "mem/cache/prefetch/stride_prefetcher.hh" +#endif + + +using namespace std; +using namespace TheISA; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + Param size; + Param assoc; + Param block_size; + Param latency; + Param mshrs; + Param tgts_per_mshr; + Param write_buffers; + Param prioritizeRequests; + SimObjectParam in_bus; + SimObjectParam out_bus; + Param do_copy; + SimObjectParam protocol; + Param trace_addr; + Param hash_delay; +#if defined(USE_CACHE_IIC) + SimObjectParam repl; +#endif + Param compressed_bus; + Param store_compressed; + Param adaptive_compression; + Param compression_latency; + Param subblock_size; + Param max_miss_count; + SimObjectParam hier; + VectorParam > addr_range; + SimObjectParam mem_trace; + Param split; + Param split_size; + Param lifo; + Param two_queue; + Param prefetch_miss; + Param prefetch_access; + Param prefetcher_size; + Param prefetch_past_page; + Param prefetch_serial_squash; + Param prefetch_latency; + Param prefetch_degree; + Param prefetch_policy; + Param prefetch_cache_check_push; + Param prefetch_use_cpu_id; + Param prefetch_data_accesses_only; + +END_DECLARE_SIM_OBJECT_PARAMS(BaseCache) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) + + INIT_PARAM(size, "capacity in bytes"), + INIT_PARAM(assoc, "associativity"), + INIT_PARAM(block_size, "block size in bytes"), + INIT_PARAM(latency, "hit latency in CPU cycles"), + INIT_PARAM(mshrs, "number of MSHRs (max outstanding requests)"), + INIT_PARAM(tgts_per_mshr, "max number of accesses per MSHR"), + INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), + INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", + false), + INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), + INIT_PARAM(out_bus, "outgoing bus object"), + INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false), + INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), + INIT_PARAM_DFLT(trace_addr, "address to trace", 0), + + INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1), +#if defined(USE_CACHE_IIC) + INIT_PARAM_DFLT(repl, "replacement policy",NULL), +#endif + INIT_PARAM_DFLT(compressed_bus, + "This cache connects to a compressed memory", + false), + INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache", + false), + INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme", + false), + INIT_PARAM_DFLT(compression_latency, + "Latency in cycles of compression algorithm", + 0), + INIT_PARAM_DFLT(subblock_size, + "Size of subblock in IIC used for compression", + 0), + INIT_PARAM_DFLT(max_miss_count, + "The number of misses to handle before calling exit", + 0), + INIT_PARAM_DFLT(hier, + "Hierarchy global variables", + &defaultHierParams), + INIT_PARAM_DFLT(addr_range, "The address range in bytes", + vector >(1,RangeIn((Addr)0, MaxAddr))), + INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), + INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false), + INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0), + INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false), + INIT_PARAM_DFLT(two_queue, "whether the lifo should have two queue replacement", false), + INIT_PARAM_DFLT(prefetch_miss, "wheter you are using the hardware prefetcher from Miss stream", false), + INIT_PARAM_DFLT(prefetch_access, "wheter you are using the hardware prefetcher from Access stream", false), + INIT_PARAM_DFLT(prefetcher_size, "Number of entries in the harware prefetch queue", 100), + INIT_PARAM_DFLT(prefetch_past_page, "Allow prefetches to cross virtual page boundaries", false), + INIT_PARAM_DFLT(prefetch_serial_squash, "Squash prefetches with a later time on a subsequent miss", false), + INIT_PARAM_DFLT(prefetch_latency, "Latency of the prefetcher", 10), + INIT_PARAM_DFLT(prefetch_degree, "Degree of the prefetch depth", 1), + INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"), + INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true), + INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true), + INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false) +END_INIT_SIM_OBJECT_PARAMS(BaseCache) + + +#define BUILD_CACHE(t, comp, b, c) do { \ + Prefetcher, b> *pf; \ + if (pf_policy == "tagged") { \ + BUILD_TAGGED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "stride") { \ + BUILD_STRIDED_PREFETCHER(t, comp, b); \ + } \ + else if (pf_policy == "ghb") { \ + BUILD_GHB_PREFETCHER(t, comp, b); \ + } \ + else { \ + BUILD_NULL_PREFETCHER(t, comp, b); \ + } \ + Cache, b, c>::Params params(tagStore, mq, coh, \ + do_copy, base_params, \ + in_bus, out_bus, pf, \ + prefetch_access); \ + Cache, b, c> *retval = \ + new Cache, b, c>(getInstanceName(), hier, \ + params); \ + if (in_bus == NULL) { \ + retval->setSlaveInterface(new MemoryInterface, b, c> >(getInstanceName(), hier, retval, mem_trace)); \ + } else { \ + retval->setSlaveInterface(new SlaveInterface, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \ + } \ + retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ + out_bus->rangeChange(); \ + return retval; \ + } while (0) + +#define BUILD_CACHE_PANIC(x) do { \ + panic("%s not compiled into M5", x); \ + } while (0) + +#if defined(USE_LZSS_COMPRESSION) +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, LZSSCompression, b, c); \ + } else { \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression, \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#else +#define BUILD_COMPRESSED_CACHE(TAGS, tags, b, c) do { \ + if (compressed_bus || store_compressed){ \ + BUILD_CACHE_PANIC("compressed caches"); \ + } else { \ + CacheTags *tagStore = \ + new CacheTags(tags, \ + compression_latency, \ + true, store_compressed, \ + adaptive_compression \ + prefetch_miss); \ + BUILD_CACHE(TAGS, NullCompression, b, c); \ + } \ + } while (0) +#endif + +#if defined(USE_CACHE_FALRU) +#define BUILD_FALRU_CACHE(b,c) do { \ + FALRU *tags = new FALRU(block_size, size, latency); \ + BUILD_COMPRESSED_CACHE(FALRU, tags, b, c); \ + } while (0) +#else +#define BUILD_FALRU_CACHE(b, c) BUILD_CACHE_PANIC("falru cache") +#endif + +#if defined(USE_CACHE_LRU) +#define BUILD_LRU_CACHE(b, c) do { \ + LRU *tags = new LRU(numSets, block_size, assoc, latency); \ + BUILD_COMPRESSED_CACHE(LRU, tags, b, c); \ + } while (0) +#else +#define BUILD_LRU_CACHE(b, c) BUILD_CACHE_PANIC("lru cache") +#endif + +#if defined(USE_CACHE_SPLIT) +#define BUILD_SPLIT_CACHE(b, c) do { \ + Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \ + two_queue, latency); \ + BUILD_COMPRESSED_CACHE(Split, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_CACHE(b, c) BUILD_CACHE_PANIC("split cache") +#endif + +#if defined(USE_CACHE_SPLIT_LIFO) +#define BUILD_SPLIT_LIFO_CACHE(b, c) do { \ + SplitLIFO *tags = new SplitLIFO(block_size, size, assoc, \ + latency, two_queue, -1); \ + BUILD_COMPRESSED_CACHE(SplitLIFO, tags, b, c); \ + } while (0) +#else +#define BUILD_SPLIT_LIFO_CACHE(b, c) BUILD_CACHE_PANIC("lifo cache") +#endif + +#if defined(USE_CACHE_IIC) +#define BUILD_IIC_CACHE(b ,c) do { \ + IIC *tags = new IIC(iic_params); \ + BUILD_COMPRESSED_CACHE(IIC, tags, b, c); \ + } while (0) +#else +#define BUILD_IIC_CACHE(b, c) BUILD_CACHE_PANIC("iic") +#endif + +#define BUILD_CACHES(b, c) do { \ + if (repl == NULL) { \ + if (numSets == 1) { \ + BUILD_FALRU_CACHE(b, c); \ + } else { \ + if (split == true) { \ + BUILD_SPLIT_CACHE(b, c); \ + } else if (lifo == true) { \ + BUILD_SPLIT_LIFO_CACHE(b, c); \ + } else { \ + BUILD_LRU_CACHE(b, c); \ + } \ + } \ + } else { \ + BUILD_IIC_CACHE(b, c); \ + } \ + } while (0) + +#define BUILD_COHERENCE(b) do { \ + if (protocol == NULL) { \ + UniCoherence *coh = new UniCoherence(); \ + BUILD_CACHES(b, UniCoherence); \ + } else { \ + SimpleCoherence *coh = new SimpleCoherence(protocol); \ + BUILD_CACHES(b, SimpleCoherence); \ + } \ + } while (0) + +#if defined(USE_TAGGED) +#define BUILD_TAGGED_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_TAGGED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Tagged Prefetcher") +#endif + +#if defined(USE_STRIDED) +#define BUILD_STRIDED_PREFETCHER(t, comp, b) pf = new \ + StridePrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_STRIDED_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("Stride Prefetcher") +#endif + +#if defined(USE_GHB) +#define BUILD_GHB_PREFETCHER(t, comp, b) pf = new \ + GHBPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree, \ + prefetch_use_cpu_id) +#else +#define BUILD_GHB_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("GHB Prefetcher") +#endif + +#if defined(USE_TAGGED) +#define BUILD_NULL_PREFETCHER(t, comp, b) pf = new \ + TaggedPrefetcher, b>(prefetcher_size, \ + !prefetch_past_page, \ + prefetch_serial_squash, \ + prefetch_cache_check_push, \ + prefetch_data_accesses_only, \ + prefetch_latency, \ + prefetch_degree) +#else +#define BUILD_NULL_PREFETCHER(t, comp, b) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)") +#endif + +CREATE_SIM_OBJECT(BaseCache) +{ + string name = getInstanceName(); + int numSets = size / (assoc * block_size); + string pf_policy = prefetch_policy; + if (subblock_size == 0) { + subblock_size = block_size; + } + + // Build BaseCache param object + BaseCache::Params base_params(addr_range, latency, + block_size, max_miss_count); + + //Warnings about prefetcher policy + if (pf_policy == "none" && (prefetch_miss || prefetch_access)) { + panic("With no prefetcher, you shouldn't prefetch from" + " either miss or access stream\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && !(prefetch_miss || prefetch_access)) { + warn("With this prefetcher you should chose a prefetch" + " stream (miss or access)\nNo Prefetching will occur\n"); + } + if ((pf_policy == "tagged" || pf_policy == "stride" || + pf_policy == "ghb") && prefetch_miss && prefetch_access) { + panic("Can't do prefetches from both miss and access" + " stream\n"); + } + if (pf_policy != "tagged" && pf_policy != "stride" && + pf_policy != "ghb" && pf_policy != "none") { + panic("Unrecognized form of a prefetcher: %s, try using" + "['none','stride','tagged','ghb']\n", pf_policy); + } + +#if defined(USE_CACHE_IIC) + // Build IIC params + IIC::Params iic_params; + iic_params.size = size; + iic_params.numSets = numSets; + iic_params.blkSize = block_size; + iic_params.assoc = assoc; + iic_params.hashDelay = hash_delay; + iic_params.hitLatency = latency; + iic_params.rp = repl; + iic_params.subblockSize = subblock_size; +#else + const void *repl = NULL; +#endif + + if (mshrs == 1 || out_bus->doEvents() == false) { + BlockingBuffer *mq = new BlockingBuffer(true); + BUILD_COHERENCE(BlockingBuffer); + } else { + MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers, + true, prefetch_miss); + BUILD_COHERENCE(MissQueue); + } + return NULL; +} + +REGISTER_SIM_OBJECT("BaseCache", BaseCache) + + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh new file mode 100644 index 0000000000..3dd8d74cde --- /dev/null +++ b/src/mem/cache/cache_impl.hh @@ -0,0 +1,661 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + * Nathan Binkert + */ + +/** + * @file + * Cache definitions. + */ + +#include +#include + +#include +#include +#include + +#include "sim/host.hh" +#include "base/misc.hh" +#include "cpu/smt.hh" + +#include "mem/cache/cache.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/prefetch/prefetcher.hh" + +#include "mem/bus/bus.hh" + +#include "mem/bus/slave_interface.hh" +#include "mem/memory_interface.hh" +#include "mem/bus/master_interface.hh" + +#include "mem/mem_debug.hh" + +#include "sim/sim_events.hh" // for SimExitEvent + +using namespace std; + +template +bool +Cache:: +doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + access(pkt); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoop(pkt); + } +} + +template +Tick +Cache:: +doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + probe(pkt, true); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoopProbe(pkt, true); + } +} + +template +void +Cache:: +doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +{ + if (isCpuSide) + { + probe(pkt, false); + } + else + { + if (pkt->isRespnse()) + handleResponse(pkt); + else + snoopProbe(pkt, false); + } +} + +template +void +Cache:: +recvStatusChange(Port::Status status, bool isCpuSide) +{ + +} + + +template +Cache:: +Cache(const std::string &_name, HierParams *hier_params, + Cache::Params ¶ms) + : BaseCache(_name, hier_params, params.baseParams), + prefetchAccess(params.prefetchAccess), + tags(params.tags), missQueue(params.missQueue), + coherence(params.coherence), prefetcher(params.prefetcher), + doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) +{ + if (params.in == NULL) { + topLevelCache = true; + } + tags->setCache(this, params.out->width, params.out->clockRate); + tags->setPrefetcher(prefetcher); + missQueue->setCache(this); + missQueue->setPrefetcher(prefetcher); + coherence->setCache(this); + prefetcher->setCache(this); + prefetcher->setTags(tags); + prefetcher->setBuffer(missQueue); + invalidatePkt = new Packet; + invalidatePkt->cmd = Invalidate; +} + +template +void +Cache::regStats() +{ + BaseCache::regStats(); + tags->regStats(name()); + missQueue->regStats(name()); + coherence->regStats(name()); + prefetcher->regStats(name()); +} + +template +MemAccessResult +Cache::access(Packet &pkt) +{ + MemDebug::cacheAccess(pkt); + BlkType *blk = NULL; + PacketList* writebacks; + int size = blkSize; + int lat = hitLatency; + if (prefetchAccess) { + //We are determining prefetches on access stream, call prefetcher + prefetcher->handleMiss(pkt, curTick); + } + if (!pkt->isUncacheable()) { + if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() + && !pkt->cmd.isWrite()) { + //Upgrade or Invalidate + //Look into what happens if two slave caches on bus + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), + pkt->paddr & ~((Addr)blkSize - 1)); + + //@todo Should this return latency have the hit latency in it? +// respond(pkt,curTick+lat); + pkt->flags |= SATISFIED; + return MA_HIT; + } + blk = tags->handleAccess(pkt, lat, writebacks); + } else { + size = pkt->size; + } + // If this is a block size write/hint (WH64) allocate the block here + // if the coherence protocol allows it. + /** @todo make the fast write alloc (wh64) work with coherence. */ + /** @todo Do we want to do fast writes for writebacks as well? */ + if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && + (pkt->cmd == Write || pkt->cmd == WriteInvalidate) ) { + // not outstanding misses, can do this + MSHR* outstanding_miss = missQueue->findMSHR(pkt->paddr, pkt->req->asid); + if (pkt->cmd ==WriteInvalidate || !outstanding_miss) { + if (outstanding_miss) { + warn("WriteInv doing a fastallocate" + "with an outstanding miss to the same address\n"); + } + blk = tags->handleFill(NULL, pkt, BlkValid | BlkWritable, + writebacks); + ++fastWrites; + } + } + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + writebacks.pop_front(); + } + DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); + if (blk) { + // Hit + hits[pkt->cmd.toIndex()][pkt->thread_num]++; + // clear dirty bit if write through + if (!pkt->cmd.isNoResponse()) + respond(pkt, curTick+lat); + return MA_HIT; + } + + // Miss + if (!pkt->isUncacheable()) { + misses[pkt->cmd.toIndex()][pkt->thread_num]++; + /** @todo Move miss count code into BaseCache */ + if (missCount) { + --missCount; + if (missCount == 0) + new SimExitEvent("A cache reached the maximum miss count"); + } + } + missQueue->handleMiss(pkt, size, curTick + hitLatency); + return MA_CACHE_MISS; +} + + +template +Packet * +Cache::getPacket() +{ + Packet * pkt = missQueue->getPacket(); + if (pkt) { + if (!pkt->isUncacheable()) { + if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->thread_num]++; + BlkType *blk = tags->findBlock(pkt); + Packet::Command cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + missQueue->setBusCmd(pkt, cmd); + } + } + + assert(!doMasterPktuest() || missQueue->havePending()); + assert(!pkt || pkt->time <= curTick); + return pkt; +} + +template +void +Cache::sendResult(MemPktPtr &pkt, bool success) +{ + if (success) { + missQueue->markInService(pkt); + //Temp Hack for UPGRADES + if (pkt->cmd == Upgrade) { + handleResponse(pkt); + } + } else if (pkt && !pkt->isUncacheable()) { + missQueue->restoreOrigCmd(pkt); + } +} + +template +void +Cache::handleResponse(Packet * &pkt) +{ + BlkType *blk = NULL; + if (pkt->senderState) { + MemDebug::cacheResponse(pkt); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->paddr, + pkt->paddr & (((ULL(1))<<48)-1)); + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + blk = tags->findBlock(pkt); + CacheBlk::State old_state = (blk) ? blk->status : 0; + MemPktList writebacks; + blk = tags->handleFill(blk, pkt->senderState, + coherence->getNewState(pkt,old_state), + writebacks); + while (!writebacks.empty()) { + missQueue->doWriteback(writebacks.front()); + } + } + missQueue->handleResponse(pkt, curTick + hitLatency); + } +} + +template +void +Cache::pseudoFill(Addr addr, int asid) +{ + // Need to temporarily move this blk into MSHRs + MSHR *mshr = missQueue->allocateTargetList(addr, asid); + int lat; + PacketList* dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->isSatisfied()); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(addr, asid); +} + +template +void +Cache::pseudoFill(MSHR *mshr) +{ + // Need to temporarily move this blk into MSHRs + assert(mshr->pkt->cmd == Read); + int lat; + PacketList* dummy; + // Read the data into the mshr + BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); + assert(dummy.empty()); + assert(mshr->pkt->isSatisfied()); + // can overload order since it isn't used on non pending blocks + mshr->order = blk->status; + // temporarily remove the block from the cache. + tags->invalidateBlk(mshr->pkt->paddr, mshr->pkt->req->asid); +} + + +template +Packet * +Cache::getCoherenceReq() +{ + return coherence->getPacket(); +} + + +template +void +Cache::snoop(Packet * &pkt) +{ + Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req + //If we find an mshr, and it is in service, we need to NACK or invalidate + if (mshr) { + if (mshr->inService) { + if ((mshr->pkt->cmd.isInvalidate() || !mshr->pkt->isCacheFill()) + && (pkt->cmd != Invalidate && pkt->cmd != WriteInvalidate)) { + //If the outstanding request was an invalidate (upgrade,readex,..) + //Then we need to ACK the request until we get the data + //Also NACK if the outstanding request is not a cachefill (writeback) + pkt->flags |= NACKED_LINE; + return; + } + else { + //The supplier will be someone else, because we are waiting for + //the data. This should cause this cache to be forced to go to + //the shared state, not the exclusive even though the shared line + //won't be asserted. But for now we will just invlidate ourselves + //and allow the other cache to go into the exclusive state. + //@todo Make it so a read to a pending read doesn't invalidate. + //@todo Make it so that a read to a pending read can't be exclusive now. + + //Set the address so find match works + invalidatePkt->paddr = pkt->paddr; + + //Append the invalidate on + missQueue->addTarget(mshr,invalidatePkt); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + return; + } + } + } + //We also need to check the writeback buffers and handle those + std::vector writebacks; + if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + + //Look through writebacks for any non-uncachable writes, use that + for (int i=0; ipkt->isUncacheable()) { + if (pkt->cmd.isRead()) { + //Only Upgrades don't get here + //Supply the data + pkt->flags |= SATISFIED; + + //If we are in an exclusive protocol, make it ask again + //to get write permissions (upgrade), signal shared + pkt->flags |= SHARED_LINE; + + if (doData()) { + assert(pkt->cmd.isRead()); + + assert(pkt->offset < blkSize); + assert(pkt->size <= blkSize); + assert(pkt->offset + pkt->size <=blkSize); + memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); + } + respondToSnoop(pkt); + } + + if (pkt->cmd.isInvalidate()) { + //This must be an upgrade or other cache will take ownership + missQueue->markInService(mshr->pkt); + } + return; + } + } + } + } + CacheBlk::State new_state; + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + respondToSnoop(pkt); + return; + } + tags->handleSnoop(blk, new_state); +} + +template +void +Cache::snoopResponse(Packet * &pkt) +{ + //Need to handle the response, if NACKED + if (pkt->isNacked()) { + //Need to mark it as not in service, and retry for bus + assert(0); //Yeah, we saw a NACK come through + + //For now this should never get called, we return false when we see a NACK + //instead, by doing this we allow the bus_blocked mechanism to handle the retry + //For now it retrys in just 2 cycles, need to figure out how to change that + //Eventually we will want to also have success come in as a parameter + //Need to make sure that we handle the functionality that happens on successufl + //return of the sendAddr function + } +} + +template +void +Cache::invalidateBlk(Addr addr, int asid) +{ + tags->invalidateBlk(addr,asid); +} + + +/** + * @todo Fix to not assume write allocate + */ +template +Tick +Cache::probe(Packet * &pkt, bool update) +{ + MemDebug::cacheProbe(pkt); + + if (!pkt->isUncacheable()) { + if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() + && !pkt->cmd.isWrite()) { + //Upgrade or Invalidate, satisfy it, don't forward + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), + pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), + pkt->paddr & ~((Addr)blkSize - 1)); + pkt->flags |= SATISFIED; + return 0; + } + } + + if (!update && !doData()) { + // Nothing to do here + return mi->sendProbe(pkt,update); + } + + PacketList* writebacks; + int lat; + BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); + + if (!blk) { + // Need to check for outstanding misses and writes + Addr blk_addr = pkt->paddr & ~(blkSize - 1); + + // There can only be one matching outstanding miss. + MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + + // There can be many matching outstanding writes. + vector writes; + missQueue->findWrites(blk_addr, pkt->req->asid, writes); + + if (!update) { + mi->sendProbe(pkt, update); + // Check for data in MSHR and writebuffer. + if (mshr) { + warn("Found outstanding miss on an non-update probe"); + MSHR::TargetList *targets = mshr->getTargetList(); + MSHR::TargetList::iterator i = targets->begin(); + MSHR::TargetList::iterator end = targets->end(); + for (; i != end; ++i) { + Packet * target = *i; + // If the target contains data, and it overlaps the + // probed request, need to update data + if (target->cmd.isWrite() && target->overlaps(pkt)) { + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (target->paddr < pkt->paddr) { + int offset = pkt->paddr - target->paddr; + pkt_data = pkt->data; + write_data = target->data + offset; + data_size = target->size - offset; + assert(data_size > 0); + if (data_size > pkt->size) + data_size = pkt->size; + } else { + int offset = target->paddr - pkt->paddr; + pkt_data = pkt->data + offset; + write_data = target->data; + data_size = pkt->size - offset; + assert(data_size > pkt->size); + if (data_size > target->size) + data_size = target->size; + } + + if (pkt->cmd.isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + } + } + } + for (int i = 0; i < writes.size(); ++i) { + Packet * write = writes[i]->pkt; + if (write->overlaps(pkt)) { + warn("Found outstanding write on an non-update probe"); + uint8_t* pkt_data; + uint8_t* write_data; + int data_size; + if (write->paddr < pkt->paddr) { + int offset = pkt->paddr - write->paddr; + pkt_data = pkt->data; + write_data = write->data + offset; + data_size = write->size - offset; + assert(data_size > 0); + if (data_size > pkt->size) + data_size = pkt->size; + } else { + int offset = write->paddr - pkt->paddr; + pkt_data = pkt->data + offset; + write_data = write->data; + data_size = pkt->size - offset; + assert(data_size > pkt->size); + if (data_size > write->size) + data_size = write->size; + } + + if (pkt->cmd.isWrite()) { + memcpy(pkt_data, write_data, data_size); + } else { + memcpy(write_data, pkt_data, data_size); + } + + } + } + return 0; + } else { + // update the cache state and statistics + if (mshr || !writes.empty()){ + // Can't handle it, return pktuest unsatisfied. + return 0; + } + if (!pkt->isUncacheable()) { + // Fetch the cache block to fill + Packet * busPkt = new MemPkt(); + busPkt->paddr = blk_addr; + busPkt->size = blkSize; + busPkt->data = new uint8_t[blkSize]; + + BlkType *blk = tags->findBlock(pkt); + busPkt->cmd = coherence->getBusCmd(pkt->cmd, + (blk)? blk->status : 0); + + busPkt->req->asid = pkt->req->asid; + busPkt->xc = pkt->xc; + busPkt->thread_num = pkt->thread_num; + busPkt->time = curTick; + + lat = mi->sendProbe(busPkt, update); + + if (!busPkt->isSatisfied()) { + // blocked at a higher level, just return + return 0; + } + + misses[pkt->cmd.toIndex()][pkt->thread_num]++; + + CacheBlk::State old_state = (blk) ? blk->status : 0; + tags->handleFill(blk, busPkt, + coherence->getNewState(busPkt, old_state), + writebacks, pkt); + // Handle writebacks if needed + while (!writebacks.empty()){ + mi->sendProbe(writebacks.front(), update); + writebacks.pop_front(); + } + return lat + hitLatency; + } else { + return mi->sendProbe(pkt,update); + } + } + } else { + // There was a cache hit. + // Handle writebacks if needed + while (!writebacks.empty()){ + mi->sendProbe(writebacks.front(), update); + writebacks.pop_front(); + } + + if (update) { + hits[pkt->cmd.toIndex()][pkt->thread_num]++; + } else if (pkt->cmd.isWrite()) { + // Still need to change data in all locations. + return mi->sendProbe(pkt, update); + } + return curTick + lat; + } + fatal("Probe not handled.\n"); + return 0; +} + +template +Tick +Cache::snoopProbe(MemPktPtr &pkt, bool update) +{ + Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + BlkType *blk = tags->findBlock(pkt); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + CacheBlk::State new_state = 0; + bool satisfy = coherence->handleBusPktuest(pkt,blk,mshr, new_state); + if (satisfy) { + tags->handleSnoop(blk, new_state, pkt); + return hitLatency; + } + tags->handleSnoop(blk, new_state); + return 0; +} + diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc new file mode 100644 index 0000000000..107fd2502b --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Ron Dreslinski + */ + +/** + * @file + * Definitions of CoherenceProtocol. + */ + +#include + +#include "base/misc.hh" +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/cache.hh" +#include "mem/cache/coherence/coherence_protocol.hh" +#include "sim/builder.hh" + +using namespace std; + + +CoherenceProtocol::StateTransition::StateTransition() + : busCmd(InvalidCmd), newState(-1), snoopFunc(invalidTransition) +{ +} + + +void +CoherenceProtocol::regStats() +{ + // Even though we count all the possible transitions in the + // requestCount and snoopCount arrays, most of these are invalid, + // so we just select the interesting ones to print here. + + requestCount[Invalid][Read] + .name(name() + ".read_invalid") + .desc("read misses to invalid blocks") + ; + + requestCount[Invalid][Write] + .name(name() +".write_invalid") + .desc("write misses to invalid blocks") + ; + + requestCount[Invalid][Soft_Prefetch] + .name(name() +".swpf_invalid") + .desc("soft prefetch misses to invalid blocks") + ; + + requestCount[Invalid][Hard_Prefetch] + .name(name() +".hwpf_invalid") + .desc("hard prefetch misses to invalid blocks") + ; + + requestCount[Shared][Write] + .name(name() + ".write_shared") + .desc("write misses to shared blocks") + ; + + requestCount[Owned][Write] + .name(name() + ".write_owned") + .desc("write misses to owned blocks") + ; + + snoopCount[Shared][Read] + .name(name() + ".snoop_read_shared") + .desc("read snoops on shared blocks") + ; + + snoopCount[Shared][ReadEx] + .name(name() + ".snoop_readex_shared") + .desc("readEx snoops on shared blocks") + ; + + snoopCount[Shared][Upgrade] + .name(name() + ".snoop_upgrade_shared") + .desc("upgradee snoops on shared blocks") + ; + + snoopCount[Modified][Read] + .name(name() + ".snoop_read_modified") + .desc("read snoops on modified blocks") + ; + + snoopCount[Modified][ReadEx] + .name(name() + ".snoop_readex_modified") + .desc("readEx snoops on modified blocks") + ; + + snoopCount[Owned][Read] + .name(name() + ".snoop_read_owned") + .desc("read snoops on owned blocks") + ; + + snoopCount[Owned][ReadEx] + .name(name() + ".snoop_readex_owned") + .desc("readEx snoops on owned blocks") + ; + + snoopCount[Owned][Upgrade] + .name(name() + ".snoop_upgrade_owned") + .desc("upgrade snoops on owned blocks") + ; + + snoopCount[Exclusive][Read] + .name(name() + ".snoop_read_exclusive") + .desc("read snoops on exclusive blocks") + ; + + snoopCount[Exclusive][ReadEx] + .name(name() + ".snoop_readex_exclusive") + .desc("readEx snoops on exclusive blocks") + ; + + snoopCount[Shared][Invalidate] + .name(name() + ".snoop_inv_shared") + .desc("Invalidate snoops on shared blocks") + ; + + snoopCount[Owned][Invalidate] + .name(name() + ".snoop_inv_owned") + .desc("Invalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][Invalidate] + .name(name() + ".snoop_inv_exclusive") + .desc("Invalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][Invalidate] + .name(name() + ".snoop_inv_modified") + .desc("Invalidate snoops on modified blocks") + ; + + snoopCount[Invalid][Invalidate] + .name(name() + ".snoop_inv_invalid") + .desc("Invalidate snoops on invalid blocks") + ; + + snoopCount[Shared][WriteInvalidate] + .name(name() + ".snoop_writeinv_shared") + .desc("WriteInvalidate snoops on shared blocks") + ; + + snoopCount[Owned][WriteInvalidate] + .name(name() + ".snoop_writeinv_owned") + .desc("WriteInvalidate snoops on owned blocks") + ; + + snoopCount[Exclusive][WriteInvalidate] + .name(name() + ".snoop_writeinv_exclusive") + .desc("WriteInvalidate snoops on exclusive blocks") + ; + + snoopCount[Modified][WriteInvalidate] + .name(name() + ".snoop_writeinv_modified") + .desc("WriteInvalidate snoops on modified blocks") + ; + + snoopCount[Invalid][WriteInvalidate] + .name(name() + ".snoop_writeinv_invalid") + .desc("WriteInvalidate snoops on invalid blocks") + ; +} + + +bool +CoherenceProtocol::invalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // invalidate the block + new_state = (blk->status & ~stateMask) | Invalid; + return false; +} + + +bool +CoherenceProtocol::supplyTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state + ) +{ + return true; +} + + +bool +CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Owned; + pkt->flags |= SHARED_LINE; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + + +bool +CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Invalid; + return supplyTrans(cache, pkt, blk, mshr, new_state); +} + +bool +CoherenceProtocol::assertShared(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + new_state = (blk->status & ~stateMask) | Shared; + pkt->flags |= SHARED_LINE; + return false; +} + +CoherenceProtocol::CoherenceProtocol(const string &name, + const string &protocol, + const bool doUpgrades) + : SimObject(name) +{ + if ((protocol == "mosi" || protocol == "moesi") && !doUpgrades) { + cerr << "CoherenceProtocol: ownership protocols require upgrade transactions" + << "(write miss on owned block generates ReadExcl, which will clobber dirty block)" + << endl; + fatal(""); + } + + Packet::CommandEnum writeToSharedCmd = doUpgrades ? Upgrade : ReadEx; + +//@todo add in hardware prefetch to this list + if (protocol == "msi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Shared); + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(nullTransition); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Exclusive][Read].onSnoop(assertShared); + transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + + if (doUpgrades) { + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + } + } + + else if(protocol == "mosi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Owned][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Shared); + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + } + + else if(protocol == "moesi") { + // incoming requests: specify outgoing bus request + transitionTable[Invalid][Read].onRequest(Read); + transitionTable[Invalid][Write].onRequest(ReadEx); + transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Owned][Write].onRequest(writeToSharedCmd); + //Prefetching causes a read + transitionTable[Invalid][Soft_Prefetch].onRequest(Read); + transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + + // on response to given request: specify new state + transitionTable[Invalid][Read].onResponse(Exclusive); + //It will move into shared if the shared line is asserted in the + //getNewState function + transitionTable[Invalid][ReadEx].onResponse(Modified); + transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + + // bus snoop transition functions + transitionTable[Invalid][Read].onSnoop(nullTransition); + transitionTable[Invalid][ReadEx].onSnoop(nullTransition); + transitionTable[Invalid][Upgrade].onSnoop(nullTransition); + transitionTable[Shared][Read].onSnoop(assertShared); + transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); + transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Exclusive][Read].onSnoop(assertShared); + transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); + transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) + transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); + transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); + transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); + transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + } + + else { + cerr << "CoherenceProtocol: unrecognized protocol " << protocol + << endl; + fatal(""); + } +} + + +Packet::Command +CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, + MSHR *mshr) +{ + state &= stateMask; + int cmd_idx = cmdIn.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd; + + assert(cmdOut != InvalidCmd); + + ++requestCount[state][cmd_idx]; + + return cmdOut; +} + + +CacheBlk::State +CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) +{ + CacheBlk::State state = oldState & stateMask; + int cmd_idx = pkt->cmd.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + + CacheBlk::State newState = transitionTable[state][cmd_idx].newState; + + //Check if it's exclusive and the shared line was asserted, + //then goto shared instead + if (newState == Exclusive && (pkt->flags & SHARED_LINE)) { + newState = Shared; + } + + assert(newState != -1); + + //Make sure not to loose any other state information + newState = (oldState & ~stateMask) | newState; + return newState; +} + + +bool +CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, + MSHR *mshr, + CacheBlk::State & new_state) +{ + if (blk == NULL) { + // nothing to do if we don't have a block + return false; + } + + CacheBlk::State state = blk->status & stateMask; + int cmd_idx = pkt->cmd.toIndex(); + + assert(0 <= state && state <= stateMax); + assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); + +// assert(mshr == NULL); // can't currently handle outstanding requests + //Check first if MSHR, and also insure, if there is one, that it is not in service + assert(!mshr || mshr->inService == 0); + ++snoopCount[state][cmd_idx]; + + bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr, + new_state); + + + + return ret; +} + +bool +CoherenceProtocol::nullTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + // do nothing + if (blk) + new_state = blk->status; + return false; +} + + +bool +CoherenceProtocol::invalidTransition(BaseCache *cache, Packet * &pkt, + CacheBlk *blk, MSHR *mshr, + CacheBlk::State & new_state) +{ + panic("Invalid transition"); + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + Param protocol; + Param do_upgrades; + +END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + INIT_PARAM(protocol, "name of coherence protocol"), + INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true) + +END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol) + + +CREATE_SIM_OBJECT(CoherenceProtocol) +{ + return new CoherenceProtocol(getInstanceName(), protocol, + do_upgrades); +} + +REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh new file mode 100644 index 0000000000..4f65205525 --- /dev/null +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + */ + +/** + * @file + * Declaration of CoherenceProcotol a basic coherence policy. + */ +#ifndef __COHERENCE_PROTOCOL_HH__ +#define __COHERENCE_PROTOCOL_HH__ + +#include + +#include "sim/sim_object.hh" +#include "mem/packet.hh" +#include "mem/mem_cmd.hh" +#include "mem/cache/cache_blk.hh" +#include "base/statistics.hh" + +class BaseCache; +class MSHR; + +/** + * A simple coherence policy for the memory hierarchy. Currently implements + * MSI, MESI, and MOESI protocols. + */ +class CoherenceProtocol : public SimObject +{ + public: + /** + * Contruct and initialize this policy. + * @param name The name of this policy. + * @param protocol The string representation of the protocol to use. + * @param doUpgrades True if bus upgrades should be used. + */ + CoherenceProtocol(const std::string &name, const std::string &protocol, + const bool doUpgrades); + + /** + * Destructor. + */ + virtual ~CoherenceProtocol() {}; + + /** + * Register statistics + */ + virtual void regStats(); + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param status The current state of the cache block. + * @param mshr The MSHR matching the request. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command cmd, CacheBlk::State status, + MSHR *mshr = NULL); + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param oldState The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(const Packet * &pkt, + CacheBlk::State oldState); + + /** + * Handle snooped bus requests. + * @param cache The cache that snooped the request. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(BaseCache *cache, Packet * &pkt, CacheBlk *blk, + MSHR *mshr, CacheBlk::State &new_state); + + protected: + /** Snoop function type. */ + typedef bool (*SnoopFuncType)(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + // + // Standard snoop transition functions + // + + /** + * Do nothing transition. + */ + static bool nullTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalid transition, basically panic. + */ + static bool invalidTransition(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, move to Invalid state. + */ + static bool invalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data, no state transition. + */ + static bool supplyTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Shared state. + */ + static bool supplyAndGotoSharedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Supply data and go to Owned state. + */ + static bool supplyAndGotoOwnedTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Invalidate block, supply data, and go to Invalid state. + */ + static bool supplyAndInvalidateTrans(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Assert the shared line for a block that is shared/exclusive. + */ + static bool assertShared(BaseCache *, Packet *&, CacheBlk *, + MSHR *, CacheBlk::State&); + + /** + * Definition of protocol state transitions. + */ + class StateTransition + { + friend class CoherenceProtocol; + + /** The bus command of this transition. */ + Packet::Command busCmd; + /** The state to transition to. */ + int newState; + /** The snoop function for this transition. */ + SnoopFuncType snoopFunc; + + /** + * Constructor, defaults to invalid transition. + */ + StateTransition(); + + /** + * Initialize bus command. + * @param cmd The bus command to use. + */ + void onRequest(Packet::Command cmd) + { + busCmd = cmd; + } + + /** + * Set the transition state. + * @param s The new state. + */ + void onResponse(CacheBlk::State s) + { + newState = s; + } + + /** + * Initialize the snoop function. + * @param f The new snoop function. + */ + void onSnoop(SnoopFuncType f) + { + snoopFunc = f; + } + }; + + friend class CoherenceProtocol::StateTransition; + + /** Mask to select status bits relevant to coherence protocol. */ + const static CacheBlk::State + stateMask = BlkValid | BlkWritable | BlkDirty; + + /** The Modified (M) state. */ + const static CacheBlk::State + Modified = BlkValid | BlkWritable | BlkDirty; + /** The Owned (O) state. */ + const static CacheBlk::State + Owned = BlkValid | BlkDirty; + /** The Exclusive (E) state. */ + const static CacheBlk::State + Exclusive = BlkValid | BlkWritable; + /** The Shared (S) state. */ + const static CacheBlk::State + Shared = BlkValid; + /** The Invalid (I) state. */ + const static CacheBlk::State + Invalid = 0; + + /** + * Maximum state encoding value (used to size transition lookup + * table). Could be more than number of states, depends on + * encoding of status bits. + */ + const static int stateMax = stateMask; + + /** + * The table of all possible transitions, organized by starting state and + * request command. + */ + StateTransition transitionTable[stateMax+1][NUM_MEM_CMDS]; + + /** + * @addtogroup CoherenceStatistics + * @{ + */ + /** + * State accesses from parent cache. + */ + Stats::Scalar<> requestCount[stateMax+1][NUM_MEM_CMDS]; + /** + * State accesses from snooped requests. + */ + Stats::Scalar<> snoopCount[stateMax+1][NUM_MEM_CMDS]; + /** + * @} + */ +}; + +#endif // __COHERENCE_PROTOCOL_HH__ diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh new file mode 100644 index 0000000000..1956745902 --- /dev/null +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a simple coherence policy. + */ + +#ifndef __SIMPLE_COHERENCE_HH__ +#define __SIMPLE_COHERENCE_HH__ + +#include + +#include "mem/packet.hh" +#include "mem/mem_cmd.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/cache/coherence/coherence_protocol.hh" + +class BaseCache; + +/** + * A simple MP coherence policy. This policy assumes an atomic bus and only one + * level of cache. + */ +class SimpleCoherence +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + /** Pointer to the coherence protocol. */ + CoherenceProtocol *protocol; + + public: + /** + * Construct and initialize this coherence policy. + * @param _protocol The coherence protocol to use. + */ + SimpleCoherence(CoherenceProtocol *_protocol) + : protocol(_protocol) + { + } + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * This policy does not forward invalidates, return NULL. + * @return NULL. + */ + Packet * getPacket() + { + return NULL; + } + + /** + * Return the proper state given the current state and the bus response. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + return protocol->getNewState(pkt, current); + } + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state Return the new state for the block. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) + { +// assert(mshr == NULL); +//Got rid of, there could be an MSHR, but it can't be in service + if (blk != NULL) + { + if (pkt->cmd != Writeback) { + return protocol->handleBusRequest(cache, pkt, blk, mshr, + new_state); + } + else { //It is a writeback, must be ownership protocol, just keep state + new_state = blk->status; + } + } + return false; + } + + /** + * Get the proper bus command for the given command and status. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Writeback) return Writeback; + return protocol->getBusCmd(cmd, state); + } + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return false; } + + bool hasProtocol() { return true; } +}; + +#endif //__SIMPLE_COHERENCE_HH__ + + + + + + + + diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc new file mode 100644 index 0000000000..68a78e3951 --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#include "mem/cache/coherence/uni_coherence.hh" +#include "mem/cache/base_cache.hh" + +#include "base/trace.hh" + +using namespace std; + +UniCoherence::UniCoherence() + : cshrs(50) +{ +} + +Packet * +UniCoherence::getPacket() +{ + bool unblock = cshrs.isFull(); + Packet * pkt = cshrs.getPkt(); + cshrs.markInService(pkt->senderState); + if (!cshrs.havePending()) { + cache->clearSlaveRequest(Request_Coherence); + } + if (unblock) { + //since CSHRs are always used as buffers, should always get rid of one + assert(!cshrs.isFull()); + cache->clearBlocked(Blocked_Coherence); + } + return pkt; +} + +/** + * @todo add support for returning slave requests, not doing them here. + */ +bool +UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state) +{ + new_state = 0; + if (pkt->cmd.isInvalidate()) { + DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", + pkt->paddr, blk); + if (!cache->isTopLevel()) { + // Forward to other caches + Packet * tmp = new MemPkt(); + tmp->cmd = Invalidate; + tmp->paddr = pkt->paddr; + tmp->size = pkt->size; + cshrs.allocate(tmp); + cache->setSlaveRequest(Request_Coherence, curTick); + if (cshrs.isFull()) { + cache->setBlockedForSnoop(Blocked_Coherence); + } + } + } else { + if (blk) { + new_state = blk->status; + } + } + return false; +} diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh new file mode 100644 index 0000000000..b64f6c931a --- /dev/null +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +#ifndef __UNI_COHERENCE_HH__ +#define __UNI_COHERENCE_HH__ + +#include "base/trace.hh" +#include "mem/cache/cache_blk.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "mem/mem_cmd.hh" +#include "mem/packet.hh" + +class BaseCache; + +class UniCoherence +{ + protected: + /** Buffers to hold forwarded invalidates. */ + MSHRQueue cshrs; + /** Pointer to the parent cache. */ + BaseCache *cache; + + public: + /** + * Construct and initialize this coherence policy. + */ + UniCoherence(); + + /** + * Set the pointer to the parent cache. + * @param _cache The parent cache. + */ + void setCache(BaseCache *_cache) + { + cache = _cache; + } + + /** + * Register statistics. + * @param name The name to prepend to stat descriptions. + */ + void regStats(const std::string &name) + { + } + + /** + * Return Read. + * @param cmd The request's command. + * @param state The current state of the cache block. + * @return The proper bus command, as determined by the protocol. + * @todo Make changes so writebacks don't get here. + */ + Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) + { + if (cmd == Hard_Prefetch && state) + warn("Trying to issue a prefetch to a block we already have\n"); + if (cmd == Writeback) + return Writeback; + return Read; + } + + /** + * Just return readable and writeable. + * @param req The bus response. + * @param current The current block state. + * @return The new state. + */ + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State current) + { + if (pkt->senderState) //Blocking Buffers don't get mshrs + { + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); + return BlkHWPrefetched | BlkValid | BlkWritable; + } + else { + return BlkValid | BlkWritable; + } + } + //@todo What about prefetching with blocking buffers + else + return BlkValid | BlkWritable; + } + /** + * Return outstanding invalidate to forward. + * @return The next invalidate to forward to lower levels of cache. + */ + Packet * getPacket(); + + /** + * Handle snooped bus requests. + * @param req The snooped bus request. + * @param blk The cache block corresponding to the request, if any. + * @param mshr The MSHR corresponding to the request, if any. + * @param new_state The new coherence state of the block. + * @return True if the request should be satisfied locally. + */ + bool handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, + CacheBlk::State &new_state); + + /** + * Return true if this coherence policy can handle fast cache writes. + */ + bool allowFastWrites() { return true; } + + bool hasProtocol() { return false; } +}; + +#endif //__UNI_COHERENCE_HH__ diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc new file mode 100644 index 0000000000..621855c3d1 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of a simple buffer for a blocking cache. + */ + +#include "cpu/exec_context.hh" +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/blocking_buffer.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include "sim/eventq.hh" // for Event declaration. + +using namespace TheISA; + +/** + * @todo Move writebacks into shared BaseBuffer class. + */ +void +BlockingBuffer::regStats(const std::string &name) +{ + using namespace Stats; + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; +} + +void +BlockingBuffer::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} +void +BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) +{ + Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); + if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + pkt->cmd.isNoResponse())) { + if (pkt->cmd.isNoResponse()) { + wb.allocateAsBuffer(pkt); + } else { + wb.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + } + if (cache->doData()) { + memcpy(wb.pkt->data, pkt->data, blk_size); + } + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, time); + return; + } + + if (pkt->cmd.isNoResponse()) { + miss.allocateAsBuffer(pkt); + } else { + miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + } + if (!pkt->isUncacheable()) { + miss.pkt->flags |= CACHE_LINE_FILL; + } + cache->setBlocked(Blocked_NoMSHRs); + cache->setMasterRequest(Request_MSHR, time); +} + +Packet * +BlockingBuffer::getPacket() +{ + if (miss.pkt && !miss.inService) { + return miss.pkt; + } + return wb.pkt; +} + +void +BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + MSHR *mshr = pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill()) + pkt->cmd = cmd; +} + +void +BlockingBuffer::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmd = pkt->senderState->originalCmd; +} + +void +BlockingBuffer::markInService(Packet * &pkt) +{ + if (!pkt->isCacheFill() && pkt->cmd.isWrite()) { + // Forwarding a write/ writeback, don't need to change + // the command + assert(pkt->senderState == &wb); + cache->clearMasterRequest(Request_WB); + if (pkt->cmd.isNoResponse()) { + assert(wb.getNumTargets() == 0); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + wb.inService = true; + } + } else { + assert(pkt->senderState == &miss); + cache->clearMasterRequest(Request_MSHR); + if (pkt->cmd.isNoResponse()) { + assert(miss.getNumTargets() == 0); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + //mark in service + miss.inService = true; + } + } +} + +void +BlockingBuffer::handleResponse(Packet * &pkt, Tick time) +{ + if (pkt->isCacheFill()) { + // targets were handled in the cache tags + assert(pkt->senderState == &miss); + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } else { + if (pkt->senderState->hasTargets()) { + // Should only have 1 target if we had any + assert(pkt->senderState->getNumTargets() == 1); + Packet * target = pkt->senderState->getTarget(); + pkt->senderState->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + assert(!pkt->senderState->hasTargets()); + } + + if (pkt->cmd.isWrite()) { + assert(pkt->senderState == &wb); + wb.deallocate(); + cache->clearBlocked(Blocked_NoWBBuffers); + } else { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + } + } +} + +void +BlockingBuffer::squash(int thread_number) +{ + if (miss.threadNum == thread_number) { + Packet * target = miss.getTarget(); + miss.popTarget(); + assert(target->thread_num == thread_number); + if (target->completionEvent != NULL) { + delete target->completionEvent; + } + target = NULL; + assert(!miss.hasTargets()); + miss.ntargets=0; + if (!miss.inService) { + miss.deallocate(); + cache->clearBlocked(Blocked_NoMSHRs); + cache->clearMasterRequest(Request_MSHR); + } + } +} + +void +BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, + int size, uint8_t *data, bool compressed) +{ + + // Generate request + Packet * pkt = new Packet(); + pkt->paddr = addr; + pkt->req->asid = asid; + pkt->size = size; + pkt->data = new uint8_t[size]; + if (data) { + memcpy(pkt->data, data, size); + } + /** + * @todo Need to find a way to charge the writeback to the "correct" + * thread. + */ + pkt->xc = xc; + if (xc) + pkt->thread_num = xc->getThreadNum(); + else + pkt->thread_num = 0; + + pkt->cmd = Writeback; + if (compressed) { + pkt->flags |= COMPRESSED; + } + + writebacks[pkt->thread_num]++; + + wb.allocateAsBuffer(pkt); + cache->setMasterRequest(Request_WB, curTick); + cache->setBlocked(Blocked_NoWBBuffers); +} + + + +void +BlockingBuffer::doWriteback(Packet * &pkt) +{ + writebacks[pkt->thread_num]++; + + wb.allocateAsBuffer(pkt); + + // Since allocate as buffer copies the request, + // need to copy data here. + if (cache->doData()) { + memcpy(wb.pkt->data, pkt->data, pkt->size); + } + cache->setBlocked(Blocked_NoWBBuffers); + cache->setMasterRequest(Request_WB, curTick); +} diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh new file mode 100644 index 0000000000..52256be742 --- /dev/null +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a simple buffer for a blocking cache. + */ + +#ifndef __BLOCKING_BUFFER_HH__ +#define __BLOCKING_BUFFER_HH__ + +#include + +#include "mem/cache/miss/mshr.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; + +/** + * Miss and writeback storage for a blocking cache. + */ +class BlockingBuffer +{ +protected: + /** Miss storage. */ + MSHR miss; + /** WB storage. */ + MSHR wb; + + //Params + + /** Allocate on write misses. */ + const bool writeAllocate; + + /** Pointer to the parent cache. */ + BaseCache* cache; + + BasePrefetcher* prefetcher; + + /** Block size of the parent cache. */ + int blkSize; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** + * @} + */ + +public: + /** + * Builds and initializes this buffer. + * @param write_allocate If true, treat write misses the same as reads. + */ + BlockingBuffer(bool write_allocate) + : writeAllocate(write_allocate) + { + } + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Requests the bus and marks the cache as + * blocked. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) + { + fatal("Unimplemented"); + } + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Frees the resources of the request and unblock the cache. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param thread_number The thread number of the requests to squash. + */ + void squash(int thread_number); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return miss.getNumTargets(); + } + + /** + * Searches for the supplied address in the miss "queue". + * @param addr The address to look for. + * @param asid The address space id. + * @return A pointer to miss if it matches. + */ + MSHR* findMSHR(Addr addr, int asid) + { + if (miss.addr == addr && miss.pkt) + return &miss; + return NULL; + } + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes List of pointers to the matching writes. + * @return True if there is a matching write. + */ + bool findWrites(Addr addr, int asid, std::vector& writes) + { + if (wb.addr == addr && wb.pkt) { + writes.push_back(&wb); + return true; + } + return false; + } + + + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param xc The execution context of the address space. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, ExecContext *xc, + int size, uint8_t *data, bool compressed); + + /** + * Perform a writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending() + { + return !miss.inService || !wb.inService; + } + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + fatal("Shouldn't call this on a blocking buffer."); + } + + /** + * Dummy implmentation. + */ + MSHR* allocateTargetList(Addr addr, int asid) + { + fatal("Unimplemented"); + } +}; + +#endif // __BLOCKING_BUFFER_HH__ diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc new file mode 100644 index 0000000000..7902fbcee9 --- /dev/null +++ b/src/mem/cache/miss/miss_queue.cc @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue definitions. + */ + +#include "cpu/exec_context.hh" +#include "cpu/smt.hh" //for maxThreadsPerCPU +#include "mem/cache/base_cache.hh" +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" + +using namespace std; + +// simple constructor +/** + * @todo Remove the +16 from the write buffer constructor once we handle + * stalling on writebacks do to compression writes. + */ +MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss) + : mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs), + numTarget(numTargets), writeBuffers(write_buffers), + writeAllocate(write_allocate), order(0), prefetchMiss(prefetch_miss) +{ + noTargetMSHR = NULL; +} + +void +MissQueue::regStats(const string &name) +{ + using namespace Stats; + + writebacks + .init(maxThreadsPerCPU) + .name(name + ".writebacks") + .desc("number of writebacks") + .flags(total) + ; + + // MSHR hit statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = cmd.toString(); + + mshr_hits[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_hits") + .desc("number of " + cstr + " MSHR hits") + .flags(total | nozero | nonan) + ; + } + + demandMshrHits + .name(name + ".demand_mshr_hits") + .desc("number of demand (read+write) MSHR hits") + .flags(total) + ; + demandMshrHits = mshr_hits[Read] + mshr_hits[Write]; + + overallMshrHits + .name(name + ".overall_mshr_hits") + .desc("number of overall MSHR hits") + .flags(total) + ; + overallMshrHits = demandMshrHits + mshr_hits[Soft_Prefetch] + + mshr_hits[Hard_Prefetch]; + + // MSHR miss statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_misses[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_misses") + .desc("number of " + cstr + " MSHR misses") + .flags(total | nozero | nonan) + ; + } + + demandMshrMisses + .name(name + ".demand_mshr_misses") + .desc("number of demand (read+write) MSHR misses") + .flags(total) + ; + demandMshrMisses = mshr_misses[Read] + mshr_misses[Write]; + + overallMshrMisses + .name(name + ".overall_mshr_misses") + .desc("number of overall MSHR misses") + .flags(total) + ; + overallMshrMisses = demandMshrMisses + mshr_misses[Soft_Prefetch] + + mshr_misses[Hard_Prefetch]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_miss_latency[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_miss_latency") + .desc("number of " + cstr + " MSHR miss cycles") + .flags(total | nozero | nonan) + ; + } + + demandMshrMissLatency + .name(name + ".demand_mshr_miss_latency") + .desc("number of demand (read+write) MSHR miss cycles") + .flags(total) + ; + demandMshrMissLatency = mshr_miss_latency[Read] + mshr_miss_latency[Write]; + + overallMshrMissLatency + .name(name + ".overall_mshr_miss_latency") + .desc("number of overall MSHR miss cycles") + .flags(total) + ; + overallMshrMissLatency = demandMshrMissLatency + + mshr_miss_latency[Soft_Prefetch] + mshr_miss_latency[Hard_Prefetch]; + + // MSHR uncacheable statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_uncacheable[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable") + .desc("number of " + cstr + " MSHR uncacheable") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheable + .name(name + ".overall_mshr_uncacheable_misses") + .desc("number of overall MSHR uncacheable misses") + .flags(total) + ; + overallMshrUncacheable = mshr_uncacheable[Read] + mshr_uncacheable[Write] + + mshr_uncacheable[Soft_Prefetch] + mshr_uncacheable[Hard_Prefetch]; + + // MSHR miss latency statistics + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshr_uncacheable_lat[access_idx] + .init(maxThreadsPerCPU) + .name(name + "." + cstr + "_mshr_uncacheable_latency") + .desc("number of " + cstr + " MSHR uncacheable cycles") + .flags(total | nozero | nonan) + ; + } + + overallMshrUncacheableLatency + .name(name + ".overall_mshr_uncacheable_latency") + .desc("number of overall MSHR uncacheable cycles") + .flags(total) + ; + overallMshrUncacheableLatency = mshr_uncacheable_lat[Read] + + mshr_uncacheable_lat[Write] + mshr_uncacheable_lat[Soft_Prefetch] + + mshr_uncacheable_lat[Hard_Prefetch]; + +#if 0 + // MSHR access formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshrAccesses[access_idx] + .name(name + "." + cstr + "_mshr_accesses") + .desc("number of " + cstr + " mshr accesses(hits+misses)") + .flags(total | nozero | nonan) + ; + mshrAccesses[access_idx] = + mshr_hits[access_idx] + mshr_misses[access_idx] + + mshr_uncacheable[access_idx]; + } + + demandMshrAccesses + .name(name + ".demand_mshr_accesses") + .desc("number of demand (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + demandMshrAccesses = demandMshrHits + demandMshrMisses; + + overallMshrAccesses + .name(name + ".overall_mshr_accesses") + .desc("number of overall (read+write) mshr accesses") + .flags(total | nozero | nonan) + ; + overallMshrAccesses = overallMshrHits + overallMshrMisses + + overallMshrUncacheable; +#endif + + // MSHR miss rate formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + mshrMissRate[access_idx] + .name(name + "." + cstr + "_mshr_miss_rate") + .desc("mshr miss rate for " + cstr + " accesses") + .flags(total | nozero | nonan) + ; + + mshrMissRate[access_idx] = + mshr_misses[access_idx] / cache->accesses[access_idx]; + } + + demandMshrMissRate + .name(name + ".demand_mshr_miss_rate") + .desc("mshr miss rate for demand accesses") + .flags(total) + ; + demandMshrMissRate = demandMshrMisses / cache->demandAccesses; + + overallMshrMissRate + .name(name + ".overall_mshr_miss_rate") + .desc("mshr miss rate for overall accesses") + .flags(total) + ; + overallMshrMissRate = overallMshrMisses / cache->overallAccesses; + + // mshrMiss latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMshrMissLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_miss_latency") + .desc("average " + cstr + " mshr miss latency") + .flags(total | nozero | nonan) + ; + + avgMshrMissLatency[access_idx] = + mshr_miss_latency[access_idx] / mshr_misses[access_idx]; + } + + demandAvgMshrMissLatency + .name(name + ".demand_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; + + overallAvgMshrMissLatency + .name(name + ".overall_avg_mshr_miss_latency") + .desc("average overall mshr miss latency") + .flags(total) + ; + overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; + + // mshrUncacheable latency formulas + for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { + Packet::Command cmd = (Packet::CommandEnum)access_idx; + const string &cstr = cmd.toString(); + + avgMshrUncacheableLatency[access_idx] + .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") + .desc("average " + cstr + " mshr uncacheable latency") + .flags(total | nozero | nonan) + ; + + avgMshrUncacheableLatency[access_idx] = + mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + } + + overallAvgMshrUncacheableLatency + .name(name + ".overall_avg_mshr_uncacheable_latency") + .desc("average overall mshr uncacheable latency") + .flags(total) + ; + overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; + + mshr_cap_events + .init(maxThreadsPerCPU) + .name(name + ".mshr_cap_events") + .desc("number of times MSHR cap was activated") + .flags(total) + ; + + //software prefetching stats + soft_prefetch_mshr_full + .init(maxThreadsPerCPU) + .name(name + ".soft_prefetch_mshr_full") + .desc("number of mshr full events for SW prefetching instrutions") + .flags(total) + ; + + mshr_no_allocate_misses + .name(name +".no_allocate_misses") + .desc("Number of misses that were no-allocate") + ; + +} + +void +MissQueue::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +MissQueue::setPrefetcher(BasePrefetcher *_prefetcher) +{ + prefetcher = _prefetcher; +} + +MSHR* +MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = mq.allocate(pkt, size); + mshr->order = order++; + if (!pkt->isUncacheable() ){//&& !pkt->isNoAllocate()) { + // Mark this as a cache line fill + mshr->pkt->flags |= CACHE_LINE_FILL; + } + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + if (pkt->cmd != Hard_Prefetch) { + //If we need to request the bus (not on HW prefetch), do so + cache->setMasterRequest(Request_MSHR, time); + } + return mshr; +} + + +MSHR* +MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) +{ + MSHR* mshr = wb.allocate(pkt,pkt->size); + mshr->order = order++; + if (cache->doData()){ + if (pkt->isCompressed()) { + delete [] mshr->pkt->data; + mshr->pkt->actualSize = pkt->actualSize; + mshr->pkt->data = new uint8_t[pkt->actualSize]; + memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); + } else { + memcpy(mshr->pkt->data, pkt->data, pkt->size); + } + } + if (wb.isFull()) { + cache->setBlocked(Blocked_NoWBBuffers); + } + + cache->setMasterRequest(Request_WB, time); + + return mshr; +} + + +/** + * @todo Remove SW prefetches on mshr hits. + */ +void +MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) +{ +// if (!cache->isTopLevel()) + if (prefetchMiss) prefetcher->handleMiss(pkt, time); + + int size = blkSize; + Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + MSHR* mshr = NULL; + if (!pkt->isUncacheable()) { + mshr = mq.findMatch(blkAddr, pkt->req->asid); + if (mshr) { + //@todo remove hw_pf here + mshr_hits[pkt->cmd.toIndex()][pkt->thread_num]++; + if (mshr->threadNum != pkt->thread_num) { + mshr->threadNum = -1; + } + mq.allocateTarget(mshr, pkt); + if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { + //We are adding an allocate after a no-allocate + mshr->pkt->flags &= ~NO_ALLOCATE; + } + if (mshr->getNumTargets() == numTarget) { + noTargetMSHR = mshr; + cache->setBlocked(Blocked_NoTargets); + mq.moveToFront(mshr); + } + return; + } + if (pkt->isNoAllocate()) { + //Count no-allocate requests differently + mshr_no_allocate_misses++; + } + else { + mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + } + } else { + //Count uncacheable accesses + mshr_uncacheable[pkt->cmd.toIndex()][pkt->thread_num]++; + size = pkt->size; + } + if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + pkt->cmd.isNoResponse())) { + /** + * @todo Add write merging here. + */ + mshr = allocateWrite(pkt, pkt->size, time); + return; + } + + mshr = allocateMiss(pkt, size, time); +} + +MSHR* +MissQueue::fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target) +{ + Addr blkAddr = addr & ~(Addr)(blk_size - 1); + assert(mq.findMatch(addr, asid) == NULL); + MSHR *mshr = mq.allocateFetch(blkAddr, asid, blk_size, target); + mshr->order = order++; + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + cache->setMasterRequest(Request_MSHR, time); + return mshr; +} + +Packet * +MissQueue::getPacket() +{ + Packet * pkt = mq.getReq(); + if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt || + pkt->time > curTick) && wb.havePending()) { + pkt = wb.getReq(); + // Need to search for earlier miss. + MSHR *mshr = mq.findPending(pkt); + if (mshr && mshr->order < pkt->senderState->order) { + // Service misses in order until conflict is cleared. + return mq.getReq(); + } + } + if (pkt) { + MSHR* mshr = wb.findPending(pkt); + if (mshr /*&& mshr->order < pkt->senderState->order*/) { + // The only way this happens is if we are + // doing a write and we didn't have permissions + // then subsequently saw a writeback(owned got evicted) + // We need to make sure to perform the writeback first + // To preserve the dirty data, then we can issue the write + return wb.getReq(); + } + } + else if (!mq.isFull()){ + //If we have a miss queue slot, we can try a prefetch + pkt = prefetcher->getPacket(); + if (pkt) { + //Update statistic on number of prefetches issued (hwpf_mshr_misses) + mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + //It will request the bus for the future, but should clear that immedieatley + allocateMiss(pkt, pkt->size, curTick); + pkt = mq.getReq(); + assert(pkt); //We should get back a req b/c we just put one in + } + } + return pkt; +} + +void +MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) +{ + assert(pkt->senderState != 0); + MSHR * mshr = pkt->senderState; + mshr->originalCmd = pkt->cmd; + if (pkt->isCacheFill() || pkt->isNoAllocate()) + pkt->cmd = cmd; +} + +void +MissQueue::restoreOrigCmd(Packet * &pkt) +{ + pkt->cmd = pkt->senderState->originalCmd; +} + +void +MissQueue::markInService(Packet * &pkt) +{ + assert(pkt->senderState != 0); + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + /** + * @todo Should include MSHRQueue pointer in MSHR to select the correct + * one. + */ + if ((!pkt->isCacheFill() && pkt->cmd.isWrite()) || pkt->cmd == Copy) { + // Forwarding a write/ writeback, don't need to change + // the command + unblock = wb.isFull(); + wb.markInService(pkt->senderState); + if (!wb.havePending()){ + cache->clearMasterRequest(Request_WB); + } + if (unblock) { + // Do we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.markInService(pkt->senderState); + if (!mq.havePending()){ + cache->clearMasterRequest(Request_MSHR); + } + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", + cache->name()); + //Also clear pending if need be + if (!prefetcher->havePending()) + { + cache->clearMasterRequest(Request_PF); + } + } + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + if (unblock) { + cache->clearBlocked(cause); + } +} + + +void +MissQueue::handleResponse(Packet * &pkt, Tick time) +{ + MSHR* mshr = pkt->senderState; + if (pkt->senderState->originalCmd == Hard_Prefetch) { + DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", + cache->name()); + } +#ifndef NDEBUG + int num_targets = mshr->getNumTargets(); +#endif + + bool unblock = false; + bool unblock_target = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (pkt->isCacheFill() && !pkt->isNoAllocate()) { + mshr_miss_latency[mshr->originalCmd][pkt->thread_num] += + curTick - pkt->time; + // targets were handled in the cache tags + if (mshr == noTargetMSHR) { + // we always clear at least one target + unblock_target = true; + cause = Blocked_NoTargets; + noTargetMSHR = NULL; + } + + if (mshr->hasTargets()) { + // Didn't satisfy all the targets, need to resend + Packet::Command cmd = mshr->getTarget()->cmd; + mq.markPending(mshr, cmd); + mshr->order = order++; + cache->setMasterRequest(Request_MSHR, time); + } + else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } else { + if (pkt->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd][pkt->thread_num] += + curTick - pkt->time; + } + if (mshr->hasTargets() && pkt->isUncacheable()) { + // Should only have 1 target if we had any + assert(num_targets == 1); + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + assert(!mshr->hasTargets()); + } + else if (mshr->hasTargets()) { + //Must be a no_allocate with possibly more than one target + assert(mshr->pkt->isNoAllocate()); + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + if (cache->doData() && pkt->cmd.isRead()) { + memcpy(target->data, pkt->data, target->size); + } + cache->respond(target, time); + } + } + + if (pkt->cmd.isWrite()) { + // If the wrtie buffer is full, we might unblock now + unblock = wb.isFull(); + wb.deallocate(mshr); + if (unblock) { + // Did we really unblock? + unblock = !wb.isFull(); + cause = Blocked_NoWBBuffers; + } + } else { + unblock = mq.isFull(); + mq.deallocate(mshr); + if (unblock) { + unblock = !mq.isFull(); + cause = Blocked_NoMSHRs; + } + } + } + if (unblock || unblock_target) { + cache->clearBlocked(cause); + } +} + +void +MissQueue::squash(int thread_number) +{ + bool unblock = false; + BlockedCause cause = NUM_BLOCKED_CAUSES; + + if (noTargetMSHR && noTargetMSHR->threadNum == thread_number) { + noTargetMSHR = NULL; + unblock = true; + cause = Blocked_NoTargets; + } + if (mq.isFull()) { + unblock = true; + cause = Blocked_NoMSHRs; + } + mq.squash(thread_number); + if (!mq.havePending()) { + cache->clearMasterRequest(Request_MSHR); + } + if (unblock && !mq.isFull()) { + cache->clearBlocked(cause); + } + +} + +MSHR* +MissQueue::findMSHR(Addr addr, int asid) const +{ + return mq.findMatch(addr,asid); +} + +bool +MissQueue::findWrites(Addr addr, int asid, vector &writes) const +{ + return wb.findMatches(addr,asid,writes); +} + +void +MissQueue::doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed) +{ + // Generate request + Packet * pkt = buildWritebackReq(addr, asid, size, data, + compressed); + + writebacks[pkt->thread_num]++; + + allocateWrite(pkt, 0, curTick); +} + + +void +MissQueue::doWriteback(Packet * &pkt) +{ + writebacks[pkt->thread_num]++; + allocateWrite(pkt, 0, curTick); +} + + +MSHR* +MissQueue::allocateTargetList(Addr addr, int asid) +{ + MSHR* mshr = mq.allocateTargetList(addr, asid, blkSize); + mshr->pkt->flags |= CACHE_LINE_FILL; + if (mq.isFull()) { + cache->setBlocked(Blocked_NoMSHRs); + } + return mshr; +} + +bool +MissQueue::havePending() +{ + return mq.havePending() || wb.havePending() || prefetcher->havePending(); +} diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh new file mode 100644 index 0000000000..ce827fe812 --- /dev/null +++ b/src/mem/cache/miss/miss_queue.hh @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MISS_QUEUE_HH__ +#define __MISS_QUEUE_HH__ + +#include + +#include "mem/cache/miss/mshr.hh" +#include "mem/cache/miss/mshr_queue.hh" +#include "base/statistics.hh" + +class BaseCache; +class BasePrefetcher; +/** + * Manages cache misses and writebacks. Contains MSHRs to store miss data + * and the writebuffer for writes/writebacks. + * @todo need to handle data on writes better (encapsulate). + * @todo need to make replacements/writebacks happen in Cache::access + */ +class MissQueue +{ + protected: + /** The MSHRs. */ + MSHRQueue mq; + /** Write Buffer. */ + MSHRQueue wb; + + // PARAMTERS + + /** The number of MSHRs in the miss queue. */ + const int numMSHR; + /** The number of targets for each MSHR. */ + const int numTarget; + /** The number of write buffers. */ + const int writeBuffers; + /** True if the cache should allocate on a write miss. */ + const bool writeAllocate; + /** Pointer to the parent cache. */ + BaseCache* cache; + + /** The Prefetcher */ + BasePrefetcher *prefetcher; + + /** The block size of the parent cache. */ + int blkSize; + + /** Increasing order number assigned to each incoming request. */ + uint64_t order; + + bool prefetchMiss; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + /** Number of blocks written back per thread. */ + Stats::Vector<> writebacks; + + /** Number of misses that hit in the MSHRs per command and thread. */ + Stats::Vector<> mshr_hits[NUM_MEM_CMDS]; + /** Demand misses that hit in the MSHRs. */ + Stats::Formula demandMshrHits; + /** Total number of misses that hit in the MSHRs. */ + Stats::Formula overallMshrHits; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_misses[NUM_MEM_CMDS]; + /** Demand misses that miss in the MSHRs. */ + Stats::Formula demandMshrMisses; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrMisses; + + /** Number of misses that miss in the MSHRs, per command and thread. */ + Stats::Vector<> mshr_uncacheable[NUM_MEM_CMDS]; + /** Total number of misses that miss in the MSHRs. */ + Stats::Formula overallMshrUncacheable; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_miss_latency[NUM_MEM_CMDS]; + /** Total cycle latency of demand MSHR misses. */ + Stats::Formula demandMshrMissLatency; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrMissLatency; + + /** Total cycle latency of each MSHR miss, per command and thread. */ + Stats::Vector<> mshr_uncacheable_lat[NUM_MEM_CMDS]; + /** Total cycle latency of overall MSHR misses. */ + Stats::Formula overallMshrUncacheableLatency; + + /** The total number of MSHR accesses per command and thread. */ + Stats::Formula mshrAccesses[NUM_MEM_CMDS]; + /** The total number of demand MSHR accesses. */ + Stats::Formula demandMshrAccesses; + /** The total number of MSHR accesses. */ + Stats::Formula overallMshrAccesses; + + /** The miss rate in the MSHRs pre command and thread. */ + Stats::Formula mshrMissRate[NUM_MEM_CMDS]; + /** The demand miss rate in the MSHRs. */ + Stats::Formula demandMshrMissRate; + /** The overall miss rate in the MSHRs. */ + Stats::Formula overallMshrMissRate; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrMissLatency[NUM_MEM_CMDS]; + /** The average latency of a demand MSHR miss. */ + Stats::Formula demandAvgMshrMissLatency; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrMissLatency; + + /** The average latency of an MSHR miss, per command and thread. */ + Stats::Formula avgMshrUncacheableLatency[NUM_MEM_CMDS]; + /** The average overall latency of an MSHR miss. */ + Stats::Formula overallAvgMshrUncacheableLatency; + + /** The number of times a thread hit its MSHR cap. */ + Stats::Vector<> mshr_cap_events; + /** The number of times software prefetches caused the MSHR to block. */ + Stats::Vector<> soft_prefetch_mshr_full; + + Stats::Scalar<> mshr_no_allocate_misses; + + /** + * @} + */ + + private: + /** Pointer to the MSHR that has no targets. */ + MSHR* noTargetMSHR; + + /** + * Allocate a new MSHR to handle the provided miss. + * @param req The miss to buffer. + * @param size The number of bytes to fetch. + * @param time The time the miss occurs. + * @return A pointer to the new MSHR. + */ + MSHR* allocateMiss(Packet * &pkt, int size, Tick time); + + /** + * Allocate a new WriteBuffer to handle the provided write. + * @param req The write to handle. + * @param size The number of bytes to write. + * @param time The time the write occurs. + * @return A pointer to the new write buffer. + */ + MSHR* allocateWrite(Packet * &pkt, int size, Tick time); + + public: + /** + * Simple Constructor. Initializes all needed internal storage and sets + * parameters. + * @param numMSHRs The number of outstanding misses to handle. + * @param numTargets The number of outstanding targets to each miss. + * @param write_buffers The number of outstanding writes to handle. + * @param write_allocate If true, treat write misses the same as reads. + */ + MissQueue(int numMSHRs, int numTargets, int write_buffers, + bool write_allocate, bool prefetch_miss); + + /** + * Deletes all allocated internal storage. + */ + ~MissQueue(); + + /** + * Register statistics for this object. + * @param name The name of the parent cache. + */ + void regStats(const std::string &name); + + /** + * Called by the parent cache to set the back pointer. + * @param _cache A pointer to the parent cache. + */ + void setCache(BaseCache *_cache); + + void setPrefetcher(BasePrefetcher *_prefetcher); + + /** + * Handle a cache miss properly. Either allocate an MSHR for the request, + * or forward it through the write buffer. + * @param req The request that missed in the cache. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + */ + void handleMiss(Packet * &pkt, int blk_size, Tick time); + + /** + * Fetch the block for the given address and buffer the given target. + * @param addr The address to fetch. + * @param asid The address space of the address. + * @param blk_size The block size of the cache. + * @param time The time the miss is detected. + * @param target The target for the fetch. + */ + MSHR* fetchBlock(Addr addr, int asid, int blk_size, Tick time, + Packet * &target); + + /** + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. + */ + Packet * getPacket(); + + /** + * Set the command to the given bus command. + * @param req The request to update. + * @param cmd The bus command to use. + */ + void setBusCmd(Packet * &pkt, Packet::Command cmd); + + /** + * Restore the original command in case of a bus transmission error. + * @param req The request to reset. + */ + void restoreOrigCmd(Packet * &pkt); + + /** + * Marks a request as in service (sent on the bus). This can have side + * effect since storage for no response commands is deallocated once they + * are successfully sent. + * @param req The request that was sent on the bus. + */ + void markInService(Packet * &pkt); + + /** + * Collect statistics and free resources of a satisfied request. + * @param req The request that has been satisfied. + * @param time The time when the request is satisfied. + */ + void handleResponse(Packet * &pkt, Tick time); + + /** + * Removes all outstanding requests for a given thread number. If a request + * has been sent to the bus, this function removes all of its targets. + * @param thread_number The thread number of the requests to squash. + */ + void squash(int thread_number); + + /** + * Return the current number of outstanding misses. + * @return the number of outstanding misses. + */ + int getMisses() + { + return mq.getAllocatedTargets(); + } + + /** + * Searches for the supplied address in the miss queue. + * @param addr The address to look for. + * @param asid The address space id. + * @return The MSHR that contains the address, NULL if not found. + * @warning Currently only searches the miss queue. If non write allocate + * might need to search the write buffer for coherence. + */ + MSHR* findMSHR(Addr addr, int asid) const; + + /** + * Searches for the supplied address in the write buffer. + * @param addr The address to look for. + * @param asid The address space id. + * @param writes The list of writes that match the address. + * @return True if any writes are found + */ + bool findWrites(Addr addr, int asid, std::vector& writes) const; + + /** + * Perform a writeback of dirty data to the given address. + * @param addr The address to write to. + * @param asid The address space id. + * @param xc The execution context of the address space. + * @param size The number of bytes to write. + * @param data The data to write, can be NULL. + * @param compressed True if the data is compressed. + */ + void doWriteback(Addr addr, int asid, + int size, uint8_t *data, bool compressed); + + /** + * Perform the given writeback request. + * @param req The writeback request. + */ + void doWriteback(Packet * &pkt); + + /** + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. + */ + bool havePending(); + + /** + * Add a target to the given MSHR. This assumes it is in the miss queue. + * @param mshr The mshr to add a target to. + * @param req The target to add. + */ + void addTarget(MSHR *mshr, Packet * &pkt) + { + mq.allocateTarget(mshr, pkt); + } + + /** + * Allocate a MSHR to hold a list of targets to a block involved in a copy. + * If the block is marked done then the MSHR already holds the data to + * fill the block. Otherwise the block needs to be fetched. + * @param addr The address to buffer. + * @param asid The address space ID. + * @return A pointer to the allocated MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid); + +}; + +#endif //__MISS_QUEUE_HH__ diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc new file mode 100644 index 0000000000..73aeaf6cae --- /dev/null +++ b/src/mem/cache/miss/mshr.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Dave Greene + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) definitions. + */ + +#include +#include +#include + +#include "mem/cache/miss/mshr.hh" +#include "sim/root.hh" // for curTick +#include "sim/host.hh" +#include "base/misc.hh" +#include "mem/cache/cache.hh" + +using namespace std; + +MSHR::MSHR() +{ + inService = false; + ntargets = 0; + threadNum = -1; +} + +void +MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, + Packet * &target) +{ + assert(targets.empty()); + addr = _addr; + asid = _asid; + + pkt = new Packet(); // allocate new memory request + pkt->addr = addr; //picked physical address for now + pkt->cmd = cmd; + pkt->size = size; + pkt->data = new uint8_t[size]; + pkt->senderState = this; + //Set the time here for latency calculations + //pkt->time = curTick; + + if (target) { + pkt->req = target->req; + allocateTarget(target); + } +} + +// Since we aren't sure if data is being used, don't copy here. +/** + * @todo When we have a "global" data flag, might want to copy data here. + */ +void +MSHR::allocateAsBuffer(Packet * &target) +{ + addr = target->paddr; + asid = target->req->asid; + threadNum = target->thread_num; + pkt = new Packet(); + pkt->addr = target->addr; + pkt->dest = target->dest; + pkt->cmd = target->cmd; + pkt->size = target->size; + pkt->req = target->req; + pkt->data = new uint8_t[target->size]; + pkt->senderState = this; +} + +void +MSHR::deallocate() +{ + assert(targets.empty()); + assert(ntargets == 0); + pkt = NULL; + inService = false; + allocIter = NULL; + readyIter = NULL; +} + +/* + * Adds a target to an MSHR + */ +void +MSHR::allocateTarget(Packet * &target) +{ + //If we append an invalidate and we issued a read to the bus, + //but now have some pending writes, we need to move + //the invalidate to before the first non-read + if (inService && pkt->cmd.isRead() && target->cmd.isInvalidate()) { + std::list temp; + + while (!targets.empty()) { + if (!targets.front()->cmd.isRead()) break; + //Place on top of temp stack + temp.push_front(targets.front()); + //Remove from targets + targets.pop_front(); + } + + //Now that we have all the reads off until first non-read, we can + //place the invalidate on + targets.push_front(target); + + //Now we pop off the temp_stack and put them back + while (!temp.empty()) { + targets.push_front(temp.front()); + temp.pop_front(); + } + } + else { + targets.push_back(target); + } + + ++ntargets; + assert(targets.size() == ntargets); + /** + * @todo really prioritize the target commands. + */ + + if (!inService && target->cmd.isWrite()) { + pkt->cmd = WriteReq; + } +} + + + +void +MSHR::dump() +{ + ccprintf(cerr, + "inService: %d thread: %d\n" + "Addr: %x asid: %d ntargets %d\n" + "Targets:\n", + inService, threadNum, addr, asid, ntargets); + + TargetListIterator tar_it = targets.begin(); + for (int i = 0; i < ntargets; i++) { + assert(tar_it != targets.end()); + + ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", + i, (*tar_it)->paddr, (*tar_it)->cmd.toIndex()); + + tar_it++; + } + ccprintf(cerr, "\n"); +} + +MSHR::~MSHR() +{ + if (pkt) + pkt = NULL; +} diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh new file mode 100644 index 0000000000..167aa26cd1 --- /dev/null +++ b/src/mem/cache/miss/mshr.hh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Miss Status and Handling Register (MSHR) declaration. + */ + +#ifndef __MSHR_HH__ +#define __MSHR_HH__ + +#include "mem/packet.hh" +#include +#include + +class MSHR; + +/** + * Miss Status and handling Register. This class keeps all the information + * needed to handle a cache miss including a list of target requests. + */ +class MSHR { + public: + /** Defines the Data structure of the MSHR targetlist. */ + typedef std::list TargetList; + /** Target list iterator. */ + typedef std::list::iterator TargetListIterator; + /** A list of MSHRs. */ + typedef std::list List; + /** MSHR list iterator. */ + typedef List::iterator Iterator; + /** MSHR list const_iterator. */ + typedef List::const_iterator ConstIterator; + + /** Address of the miss. */ + Addr addr; + /** Adress space id of the miss. */ + short asid; + /** True if the request has been sent to the bus. */ + bool inService; + /** Thread number of the miss. */ + int threadNum; + /** The request that is forwarded to the next level of the hierarchy. */ + Packet * pkt; + /** The number of currently allocated targets. */ + short ntargets; + /** The original requesting command. */ + Packet::Command originalCmd; + /** Order number of assigned by the miss queue. */ + uint64_t order; + + /** + * Pointer to this MSHR on the ready list. + * @sa MissQueue, MSHRQueue::readyList + */ + Iterator readyIter; + /** + * Pointer to this MSHR on the allocated list. + * @sa MissQueue, MSHRQueue::allocatedList + */ + Iterator allocIter; + +private: + /** List of all requests that match the address */ + TargetList targets; + +public: + /** + * Allocate a miss to this MSHR. + * @param cmd The requesting command. + * @param addr The address of the miss. + * @param asid The address space id of the miss. + * @param size The number of bytes to request. + * @param req The original miss. + */ + void allocate(Packet::Command cmd, Addr addr, int asid, int size, + Packet * &pkt); + + /** + * Allocate this MSHR as a buffer for the given request. + * @param target The memory request to buffer. + */ + void allocateAsBuffer(Packet * &target); + + /** + * Mark this MSHR as free. + */ + void deallocate(); + + /** + * Add a request to the list of targets. + * @param target The target. + */ + void allocateTarget(Packet * &target); + + /** A simple constructor. */ + MSHR(); + /** A simple destructor. */ + ~MSHR(); + + /** + * Returns the current number of allocated targets. + * @return The current number of allocated targets. + */ + int getNumTargets() + { + return(ntargets); + } + + /** + * Returns a pointer to the target list. + * @return a pointer to the target list. + */ + TargetList* getTargetList() + { + return &targets; + } + + /** + * Returns a reference to the first target. + * @return A pointer to the first target. + */ + Packet * getTarget() + { + return targets.front(); + } + + /** + * Pop first target. + */ + void popTarget() + { + --ntargets; + targets.pop_front(); + } + + /** + * Returns true if there are targets left. + * @return true if there are targets + */ + bool hasTargets() + { + return !targets.empty(); + } + + /** + * Prints the contents of this MSHR to stderr. + */ + void dump(); +}; + +#endif //__MSHR_HH__ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc new file mode 100644 index 0000000000..72c8cc4981 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.cc @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Definition of the MSHRQueue. + */ + +#include "mem/cache/miss/mshr_queue.hh" +#include "sim/eventq.hh" + +using namespace std; + +MSHRQueue::MSHRQueue(int num_mshrs, int reserve) + : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve) +{ + allocated = 0; + inServiceMSHRs = 0; + allocatedTargets = 0; + registers = new MSHR[numMSHRs]; + for (int i = 0; i < numMSHRs; ++i) { + freeList.push_back(®isters[i]); + } +} + +MSHRQueue::~MSHRQueue() +{ + delete [] registers; +} + +MSHR* +MSHRQueue::findMatch(Addr addr, int asid) const +{ + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + return mshr; + } + } + return NULL; +} + +bool +MSHRQueue::findMatches(Addr addr, int asid, vector& matches) const +{ + // Need an empty vector + assert(matches.empty()); + bool retval = false; + MSHR::ConstIterator i = allocatedList.begin(); + MSHR::ConstIterator end = allocatedList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr == addr) { + retval = true; + matches.push_back(mshr); + } + } + return retval; + +} + +MSHR* +MSHRQueue::findPending(Packet * &pkt) const +{ + MSHR::ConstIterator i = pendingList.begin(); + MSHR::ConstIterator end = pendingList.end(); + for (; i != end; ++i) { + MSHR *mshr = *i; + if (mshr->addr < pkt->addr) { + if (mshr->addr + mshr->pkt->size > pkt->addr) { + return mshr; + } + } else { + if (pkt->addr + pkt->size > mshr->addr) { + return mshr; + } + } + + //need to check destination address for copies. + if (mshr->pkt->cmd == Copy) { + Addr dest = mshr->pkt->dest; + if (dest < pkt->addr) { + if (dest + mshr->pkt->size > pkt->addr) { + return mshr; + } + } else { + if (pkt->addr + pkt->size > dest) { + return mshr; + } + } + } + } + return NULL; +} + +MSHR* +MSHRQueue::allocate(Packet * &pkt, int size) +{ + Addr aligned_addr = pkt->addr & ~((Addr)size - 1); + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + + if (pkt->cmd.isNoResponse()) { + mshr->allocateAsBuffer(pkt); + } else { + assert(size !=0); + mshr->allocate(pkt->cmd, aligned_addr, pkt->req->req->asid, size, pkt); + allocatedTargets += 1; + } + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + mshr->allocate(Read, addr, asid, size, target); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); + + allocated += 1; + return mshr; +} + +MSHR* +MSHRQueue::allocateTargetList(Addr addr, int asid, int size) +{ + MSHR *mshr = freeList.front(); + assert(mshr->getNumTargets() == 0); + freeList.pop_front(); + Packet * dummy; + mshr->allocate(Read, addr, asid, size, dummy); + mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); + mshr->inService = true; + ++inServiceMSHRs; + ++allocated; + return mshr; +} + + +void +MSHRQueue::deallocate(MSHR* mshr) +{ + deallocateOne(mshr); +} + +MSHR::Iterator +MSHRQueue::deallocateOne(MSHR* mshr) +{ + MSHR::Iterator retval = allocatedList.erase(mshr->allocIter); + freeList.push_front(mshr); + allocated--; + allocatedTargets -= mshr->getNumTargets(); + if (mshr->inService) { + inServiceMSHRs--; + } else { + pendingList.erase(mshr->readyIter); + } + mshr->deallocate(); + return retval; +} + +void +MSHRQueue::moveToFront(MSHR *mshr) +{ + if (!mshr->inService) { + assert(mshr == *(mshr->readyIter)); + pendingList.erase(mshr->readyIter); + mshr->readyIter = pendingList.insert(pendingList.begin(), mshr); + } +} + +void +MSHRQueue::markInService(MSHR* mshr) +{ + //assert(mshr == pendingList.front()); + if (mshr->pkt->cmd.isNoResponse()) { + assert(mshr->getNumTargets() == 0); + deallocate(mshr); + return; + } + mshr->inService = true; + pendingList.erase(mshr->readyIter); + mshr->readyIter = NULL; + inServiceMSHRs += 1; + //pendingList.pop_front(); +} + +void +MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) +{ + assert(mshr->readyIter == NULL); + mshr->pkt->cmd = cmd; + mshr->pkt->flags &= ~SATISFIED; + mshr->inService = false; + --inServiceMSHRs; + /** + * @ todo might want to add rerequests to front of pending list for + * performance. + */ + mshr->readyIter = pendingList.insert(pendingList.end(), mshr); +} + +void +MSHRQueue::squash(int thread_number) +{ + MSHR::Iterator i = allocatedList.begin(); + MSHR::Iterator end = allocatedList.end(); + for (; i != end;) { + MSHR *mshr = *i; + if (mshr->threadNum == thread_number) { + while (mshr->hasTargets()) { + Packet * target = mshr->getTarget(); + mshr->popTarget(); + + assert(target->thread_num == thread_number); + if (target->completionEvent != NULL) { + delete target->completionEvent; + } + target = NULL; + } + assert(!mshr->hasTargets()); + assert(mshr->ntargets==0); + if (!mshr->inService) { + i = deallocateOne(mshr); + } else { + //mshr->pkt->flags &= ~CACHE_LINE_FILL; + ++i; + } + } else { + ++i; + } + } +} diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh new file mode 100644 index 0000000000..3e1d3f39f0 --- /dev/null +++ b/src/mem/cache/miss/mshr_queue.hh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** @file + * Declaration of a structure to manage MSHRs. + */ + +#ifndef __MSHR_QUEUE_HH__ +#define __MSHR_QUEUE_HH__ + +#include +#include "mem/cache/miss/mshr.hh" + +/** + * A Class for maintaining a list of pending and allocated memory requests. + */ +class MSHRQueue { + private: + /** MSHR storage. */ + MSHR* registers; + /** Holds pointers to all allocated MSHRs. */ + MSHR::List allocatedList; + /** Holds pointers to MSHRs that haven't been sent to the bus. */ + MSHR::List pendingList; + /** Holds non allocated MSHRs. */ + MSHR::List freeList; + + // Parameters + /** + * The total number of MSHRs in this queue. This number is set as the + * number of MSHRs requested plus (numReserve - 1). This allows for + * the same number of effective MSHRs while still maintaining the reserve. + */ + const int numMSHRs; + + /** + * The number of MSHRs to hold in reserve. This is needed because copy + * operations can allocate upto 4 MSHRs at one time. + */ + const int numReserve; + + public: + /** The number of allocated MSHRs. */ + int allocated; + /** The number of MSHRs that have been forwarded to the bus. */ + int inServiceMSHRs; + /** The number of targets waiting for response. */ + int allocatedTargets; + + /** + * Create a queue with a given number of MSHRs. + * @param num_mshrs The number of MSHRs in this queue. + * @param reserve The minimum number of MSHRs needed to satisfy any access. + */ + MSHRQueue(int num_mshrs, int reserve = 1); + + /** Destructor */ + ~MSHRQueue(); + + /** + * Find the first MSHR that matches the provide address and asid. + * @param addr The address to find. + * @param asid The address space id. + * @return Pointer to the matching MSHR, null if not found. + */ + MSHR* findMatch(Addr addr, int asid) const; + + /** + * Find and return all the matching MSHRs in the provided vector. + * @param addr The address to find. + * @param asid The address space ID. + * @param matches The vector to return pointers to the matching MSHRs. + * @return True if any matches are found, false otherwise. + * @todo Typedef the vector?? + */ + bool findMatches(Addr addr, int asid, std::vector& matches) const; + + /** + * Find any pending requests that overlap the given request. + * @param req The request to find. + * @return A pointer to the earliest matching MSHR. + */ + MSHR* findPending(Packet * &pkt) const; + + /** + * Allocates a new MSHR for the request and size. This places the request + * as the first target in the MSHR. + * @param req The request to handle. + * @param size The number in bytes to fetch from memory. + * @return The a pointer to the MSHR allocated. + * + * @pre There are free MSHRs. + */ + MSHR* allocate(Packet * &pkt, int size = 0); + + /** + * Allocate a read request for the given address, and places the given + * target on the target list. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @param target The first target for the request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateFetch(Addr addr, int asid, int size, Packet * &target); + + /** + * Allocate a target list for the given address. + * @param addr The address to fetch. + * @param asid The address space for the fetch. + * @param size The number of bytes to request. + * @return Pointer to the new MSHR. + */ + MSHR* allocateTargetList(Addr addr, int asid, int size); + + /** + * Removes the given MSHR from the queue. This places the MSHR on the + * free list. + * @param mshr + */ + void deallocate(MSHR* mshr); + + /** + * Allocates a target to the given MSHR. Used to keep track of the number + * of outstanding targets. + * @param mshr The MSHR to allocate the target to. + * @param req The target request. + */ + void allocateTarget(MSHR* mshr, Packet * &pkt) + { + mshr->allocateTarget(pkt); + allocatedTargets += 1; + } + + /** + * Remove a MSHR from the queue. Returns an iterator into the allocatedList + * for faster squash implementation. + * @param mshr The MSHR to remove. + * @return An iterator to the next entry in the allocatedList. + */ + MSHR::Iterator deallocateOne(MSHR* mshr); + + /** + * Moves the MSHR to the front of the pending list if it is not in service. + * @param mshr The mshr to move. + */ + void moveToFront(MSHR *mshr); + + /** + * Mark the given MSHR as in service. This removes the MSHR from the + * pendingList. Deallocates the MSHR if it does not expect a response. + * @param mshr The MSHR to mark in service. + */ + void markInService(MSHR* mshr); + + /** + * Mark an in service mshr as pending, used to resend a request. + * @param mshr The MSHR to resend. + * @param cmd The command to resend. + */ + void markPending(MSHR* mshr, Packet::Command cmd); + + /** + * Squash outstanding requests with the given thread number. If a request + * is in service, just squashes the targets. + * @param thread_number The thread to squash. + */ + void squash(int thread_number); + + /** + * Returns true if the pending list is not empty. + * @return True if there are outstanding requests. + */ + bool havePending() const + { + return !pendingList.empty(); + } + + /** + * Returns true if there are no free MSHRs. + * @return True if this queue is full. + */ + bool isFull() const + { + return (allocated > numMSHRs - numReserve); + } + + /** + * Returns the request at the head of the pendingList. + * @return The next request to service. + */ + Packet * getReq() const + { + if (pendingList.empty()) { + return NULL; + } + MSHR* mshr = pendingList.front(); + return mshr->pkt; + } + + /** + * Returns the number of outstanding targets. + * @return the number of allocated targets. + */ + int getAllocatedTargets() const + { + return allocatedTargets; + } + +}; + +#endif //__MSHR_QUEUE_HH__ diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc new file mode 100644 index 0000000000..14beef2601 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Hardware Prefetcher Definition. + */ + +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/prefetch/base_prefetcher.hh" +#include + +BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData) + :size(size), pageStop(pageStop), serialSquash(serialSquash), + cacheCheckPush(cacheCheckPush), only_data(onlyData) +{ +} + +void +BasePrefetcher::setCache(BaseCache *_cache) +{ + cache = _cache; + blkSize = cache->getBlockSize(); +} + +void +BasePrefetcher::regStats(const std::string &name) +{ + pfIdentified + .name(name + ".prefetcher.num_hwpf_identified") + .desc("number of hwpf identified") + ; + + pfMSHRHit + .name(name + ".prefetcher.num_hwpf_already_in_mshr") + .desc("number of hwpf that were already in mshr") + ; + + pfCacheHit + .name(name + ".prefetcher.num_hwpf_already_in_cache") + .desc("number of hwpf that were already in the cache") + ; + + pfBufferHit + .name(name + ".prefetcher.num_hwpf_already_in_prefetcher") + .desc("number of hwpf that were already in the prefetch queue") + ; + + pfRemovedFull + .name(name + ".prefetcher.num_hwpf_evicted") + .desc("number of hwpf removed due to no buffer left") + ; + + pfRemovedMSHR + .name(name + ".prefetcher.num_hwpf_removed_MSHR_hit") + .desc("number of hwpf removed because MSHR allocated") + ; + + pfIssued + .name(name + ".prefetcher.num_hwpf_issued") + .desc("number of hwpf issued") + ; + + pfSpanPage + .name(name + ".prefetcher.num_hwpf_span_page") + .desc("number of hwpf spanning a virtual page") + ; + + pfSquashed + .name(name + ".prefetcher.num_hwpf_squashed_from_miss") + .desc("number of hwpf that got squashed due to a miss aborting calculation time") + ; +} + +Packet * +BasePrefetcher::getPacket() +{ + DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name()); + + if (pf.empty()) { + DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name()); + return NULL; + } + + Packet * pkt; + bool keepTrying = false; + do { + pkt = *pf.begin(); + pf.pop_front(); + if (!cacheCheckPush) { + keepTrying = inCache(pkt); + } + if (pf.empty()) { + cache->clearMasterRequest(Request_PF); + if (keepTrying) return NULL; //None left, all were in cache + } + } while (keepTrying); + + pfIssued++; + return pkt; +} + +void +BasePrefetcher::handleMiss(Packet * &pkt, Tick time) +{ + if (!pkt->isUncacheable() && !(pkt->isInstRead() && only_data)) + { + //Calculate the blk address + Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + + //Check if miss is in pfq, if so remove it + std::list::iterator iter = inPrefetch(blkAddr); + if (iter != pf.end()) { + DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name()); + pfRemovedMSHR++; + pf.erase(iter); + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + //Remove anything in queue with delay older than time + //since everything is inserted in time order, start from end + //and work until pf.empty() or time is earlier + //This is done to emulate Aborting the previous work on a new miss + //Needed for serial calculators like GHB + if (serialSquash) { + iter = pf.end(); + iter--; + while (!pf.empty() && ((*iter)->time >= time)) { + pfSquashed++; + pf.pop_back(); + iter--; + } + if (pf.empty()) + cache->clearMasterRequest(Request_PF); + } + + + std::list addresses; + std::list delays; + calculatePrefetch(pkt, addresses, delays); + + std::list::iterator addr = addresses.begin(); + std::list::iterator delay = delays.begin(); + while (addr != addresses.end()) + { + DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name()); + //temp calc this here... + pfIdentified++; + //create a prefetch memreq + Packet * prefetch; + prefetch = new Packet(); + prefetch->paddr = (*addr); + prefetch->size = blkSize; + prefetch->cmd = Hard_Prefetch; + prefetch->xc = pkt->xc; + prefetch->data = new uint8_t[blkSize]; + prefetch->req->asid = pkt->req->asid; + prefetch->thread_num = pkt->thread_num; + prefetch->time = time + (*delay); //@todo ADD LATENCY HERE + //... initialize + + //Check if it is already in the cache + if (cacheCheckPush) { + if (inCache(prefetch)) { + addr++; + delay++; + continue; + } + } + + //Check if it is already in the miss_queue + if (inMissQueue(prefetch->paddr, prefetch->req->asid)) { + addr++; + delay++; + continue; + } + + //Check if it is already in the pf buffer + if (inPrefetch(prefetch->paddr) != pf.end()) { + pfBufferHit++; + addr++; + delay++; + continue; + } + + //We just remove the head if we are full + if (pf.size() == size) + { + DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name()); + pfRemovedFull++; + pf.pop_front(); + } + + pf.push_back(prefetch); + prefetch->flags |= CACHE_LINE_FILL; + + //Make sure to request the bus, with proper delay + cache->setMasterRequest(Request_PF, prefetch->time); + + //Increment through the list + addr++; + delay++; + } + } +} + +std::list::iterator +BasePrefetcher::inPrefetch(Addr address) +{ + //Guaranteed to only be one match, we always check before inserting + std::list::iterator iter; + for (iter=pf.begin(); iter != pf.end(); iter++) { + if (((*iter)->paddr & ~(Addr)(blkSize-1)) == address) { + return iter; + } + } + return pf.end(); +} + + diff --git a/src/mem/cache/prefetch/base_prefetcher.hh b/src/mem/cache/prefetch/base_prefetcher.hh new file mode 100644 index 0000000000..3e4fc89d10 --- /dev/null +++ b/src/mem/cache/prefetch/base_prefetcher.hh @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Miss and writeback queue declarations. + */ + +#ifndef __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ + +#include "mem/packet.hh" +#include + +class BaseCache; +class BasePrefetcher +{ + protected: + + /** The Prefetch Queue. */ + std::list pf; + + // PARAMETERS + + /** The number of MSHRs in the Prefetch Queue. */ + const int size; + + /** Pointr to the parent cache. */ + BaseCache* cache; + + /** The block size of the parent cache. */ + int blkSize; + + /** Do we prefetch across page boundaries. */ + bool pageStop; + + /** Do we remove prefetches with later times than a new miss.*/ + bool serialSquash; + + /** Do we check if it is in the cache when inserting into buffer, + or removing.*/ + bool cacheCheckPush; + + /** Do we prefetch on only data reads, or on inst reads as well. */ + bool only_data; + + public: + + Stats::Scalar<> pfIdentified; + Stats::Scalar<> pfMSHRHit; + Stats::Scalar<> pfCacheHit; + Stats::Scalar<> pfBufferHit; + Stats::Scalar<> pfRemovedFull; + Stats::Scalar<> pfRemovedMSHR; + Stats::Scalar<> pfIssued; + Stats::Scalar<> pfSpanPage; + Stats::Scalar<> pfSquashed; + + void regStats(const std::string &name); + + public: + BasePrefetcher(int numMSHRS, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData); + + virtual ~BasePrefetcher() {} + + void setCache(BaseCache *_cache); + + void handleMiss(Packet * &pkt, Tick time); + + Packet * getPacket(); + + bool havePending() + { + return !pf.empty(); + } + + virtual void calculatePrefetch(Packet * &pkt, + std::list &addresses, + std::list &delays) = 0; + + virtual bool inCache(Packet * &pkt) = 0; + + virtual bool inMissQueue(Addr address, int asid) = 0; + + std::list::iterator inPrefetch(Addr address); +}; + + +#endif //__MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/ghb_prefetcher.cc b/src/mem/cache/prefetch/ghb_prefetcher.cc new file mode 100644 index 0000000000..247ec6e8bb --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * GHB Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/ghb_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class GHBPrefetcher, MissQueue>; +template class GHBPrefetcher, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh new file mode 100644 index 0000000000..f25ebe1664 --- /dev/null +++ b/src/mem/cache/prefetch/ghb_prefetcher.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a ghb prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class GHBPrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + Addr second_last_miss_addr[64/*MAX_CPUS*/]; + Addr last_miss_addr[64/*MAX_CPUS*/]; + + Tick latency; + int degree; + bool useCPUId; + + public: + + GHBPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~GHBPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays) + { + Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + int cpuID = pkt->cpu_num; + if (!useCPUId) cpuID = 0; + + + int new_stride = blkAddr - last_miss_addr[cpuID]; + int old_stride = last_miss_addr[cpuID] - + second_last_miss_addr[cpuID]; + + second_last_miss_addr[cpuID] = last_miss_addr[cpuID]; + last_miss_addr[cpuID] = blkAddr; + + if (new_stride == old_stride) { + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * new_stride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + } +}; + +#endif // __MEM_CACHE_PREFETCH_GHB_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/stride_prefetcher.cc b/src/mem/cache/prefetch/stride_prefetcher.cc new file mode 100644 index 0000000000..93a0964689 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + * Steve Reinhardt + */ + +/** + * @file + * Stride Prefetcher template instantiations. + */ + +#include "mem/cache/tags/cache_tags.hh" + +#include "mem/cache/tags/lru.hh" + +#include "base/compression/null_compression.hh" + +#include "mem/cache/miss/miss_queue.hh" +#include "mem/cache/miss/blocking_buffer.hh" + +#include "mem/cache/prefetch/stride_prefetcher.hh" + +// Template Instantiations +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template class StridePrefetcher, MissQueue>; +template class StridePrefetcher, BlockingBuffer>; + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh new file mode 100644 index 0000000000..f897762151 --- /dev/null +++ b/src/mem/cache/prefetch/stride_prefetcher.hh @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a strided prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ + +#include "base/misc.hh" // fatal, panic, and warn + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class StridePrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + class strideEntry + { + public: + Addr IAddr; + Addr MAddr; + int stride; + int64_t confidence; + +/* bool operator < (strideEntry a,strideEntry b) + { + if (a.confidence == b.confidence) { + return true; //?????? + } + else return a.confidence < b.confidence; + }*/ + }; + Addr* lastMissAddr[64/*MAX_CPUS*/]; + + std::list table[64/*MAX_CPUS*/]; + Tick latency; + int degree; + bool useCPUId; + + + public: + + StridePrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree, bool useCPUId) + :Prefetcher(size, pageStop, serialSquash, + cacheCheckPush, onlyData), + latency(latency), degree(degree), useCPUId(useCPUId) + { + } + + ~StridePrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays) + { +// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + int cpuID = pkt->cpu_num; + if (!useCPUId) cpuID = 0; + + /* Scan Table for IAddr Match */ +/* std::list::iterator iter; + for (iter=table[cpuID].begin(); + iter !=table[cpuID].end(); + iter++) { + if ((*iter)->IAddr == pkt->pc) break; + } + + if (iter != table[cpuID].end()) { + //Hit in table + + int newStride = blkAddr - (*iter)->MAddr; + if (newStride == (*iter)->stride) { + (*iter)->confidence++; + } + else { + (*iter)->stride = newStride; + (*iter)->confidence--; + } + + (*iter)->MAddr = blkAddr; + + for (int d=1; d <= degree; d++) { + Addr newAddr = blkAddr + d * newStride; + if (this->pageStop && + (blkAddr & ~(TheISA::VMPageSize - 1)) != + (newAddr & ~(TheISA::VMPageSize - 1))) + { + //Spanned the page, so now stop + this->pfSpanPage += degree - d + 1; + return; + } + else + { + addresses.push_back(newAddr); + delays.push_back(latency); + } + } + } + else { + //Miss in table + //Find lowest confidence and replace + + } +*/ } +}; + +#endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher.hh b/src/mem/cache/prefetch/tagged_prefetcher.hh new file mode 100644 index 0000000000..17f500dd82 --- /dev/null +++ b/src/mem/cache/prefetch/tagged_prefetcher.hh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ron Dreslinski + */ + +/** + * @file + * Describes a tagged prefetcher based on template policies. + */ + +#ifndef __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ +#define __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ + +#include "mem/cache/prefetch/prefetcher.hh" + +/** + * A template-policy based cache. The behavior of the cache can be altered by + * supplying different template policies. TagStore handles all tag and data + * storage @sa TagStore. Buffering handles all misses and writes/writebacks + * @sa MissQueue. Coherence handles all coherence policy details @sa + * UniCoherence, SimpleMultiCoherence. + */ +template +class TaggedPrefetcher : public Prefetcher +{ + protected: + + Buffering* mq; + TagStore* tags; + + Tick latency; + int degree; + + public: + + TaggedPrefetcher(int size, bool pageStop, bool serialSquash, + bool cacheCheckPush, bool onlyData, + Tick latency, int degree); + + ~TaggedPrefetcher() {} + + void calculatePrefetch(Packet * &pkt, std::list &addresses, + std::list &delays); +}; + +#endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__ diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh index 7bdabbe142..9e46ba8937 100644 --- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh +++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh @@ -49,10 +49,10 @@ TaggedPrefetcher(int size, bool pageStop, bool serialSquash, template void TaggedPrefetcher:: -calculatePrefetch(MemReqPtr &req, std::list &addresses, +calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = req->paddr & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); for (int d=1; d <= degree; d++) { Addr newAddr = blkAddr + d*(this->blkSize); diff --git a/src/mem/cache/tags/base_tags.cc b/src/mem/cache/tags/base_tags.cc new file mode 100644 index 0000000000..1537373004 --- /dev/null +++ b/src/mem/cache/tags/base_tags.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Definitions of BaseTags. + */ + +#include "mem/cache/tags/base_tags.hh" + +#include "mem/cache/base_cache.hh" +#include "cpu/smt.hh" //maxThreadsPerCPU +#include "sim/sim_exit.hh" + +using namespace std; + +void +BaseTags::setCache(BaseCache *_cache) +{ + cache = _cache; + objName = cache->name(); +} + +void +BaseTags::regStats(const string &name) +{ + using namespace Stats; + replacements + .init(maxThreadsPerCPU) + .name(name + ".replacements") + .desc("number of replacements") + .flags(total) + ; + + tagsInUse + .name(name + ".tagsinuse") + .desc("Cycle average of tags in use") + ; + + totalRefs + .name(name + ".total_refs") + .desc("Total number of references to valid blocks.") + ; + + sampledRefs + .name(name + ".sampled_refs") + .desc("Sample count of references to valid blocks.") + ; + + avgRefs + .name(name + ".avg_refs") + .desc("Average number of references to valid blocks.") + ; + + avgRefs = totalRefs/sampledRefs; + + warmupCycle + .name(name + ".warmup_cycle") + .desc("Cycle when the warmup percentage was hit.") + ; + + registerExitCallback(new BaseTagsCallback(this)); +} diff --git a/src/mem/cache/tags/base_tags.hh b/src/mem/cache/tags/base_tags.hh new file mode 100644 index 0000000000..b7b0c7ef03 --- /dev/null +++ b/src/mem/cache/tags/base_tags.hh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Ron Dreslinski + */ + +/** + * @file + * Declaration of a common base class for cache tagstore objects. + */ + +#ifndef __BASE_TAGS_HH__ +#define __BASE_TAGS_HH__ + +#include +#include "base/statistics.hh" +#include "base/callback.hh" + +class BaseCache; + +/** + * A common base class of Cache tagstore objects. + */ +class BaseTags +{ + protected: + /** Pointer to the parent cache. */ + BaseCache *cache; + + /** Local copy of the parent cache name. Used for DPRINTF. */ + std::string objName; + + /** + * The number of tags that need to be touched to meet the warmup + * percentage. + */ + int warmupBound; + /** Marked true when the cache is warmed up. */ + bool warmedUp; + + // Statistics + /** + * @addtogroup CacheStatistics + * @{ + */ + + /** Number of replacements of valid blocks per thread. */ + Stats::Vector<> replacements; + /** Per cycle average of the number of tags that hold valid data. */ + Stats::Average<> tagsInUse; + + /** The total number of references to a block before it is replaced. */ + Stats::Scalar<> totalRefs; + + /** + * The number of reference counts sampled. This is different from + * replacements because we sample all the valid blocks when the simulator + * exits. + */ + Stats::Scalar<> sampledRefs; + + /** + * Average number of references to a block before is was replaced. + * @todo This should change to an average stat once we have them. + */ + Stats::Formula avgRefs; + + /** The cycle that the warmup percentage was hit. */ + Stats::Scalar<> warmupCycle; + /** + * @} + */ + + public: + + /** + * Destructor. + */ + virtual ~BaseTags() {} + + /** + * Set the parent cache back pointer. Also copies the cache name to + * objName. + * @param _cache Pointer to parent cache. + */ + void setCache(BaseCache *_cache); + + /** + * Return the parent cache name. + * @return the parent cache name. + */ + const std::string &name() const + { + return objName; + } + + /** + * Register local statistics. + * @param name The name to preceed each statistic name. + */ + void regStats(const std::string &name); + + /** + * Average in the reference count for valid blocks when the simulation + * exits. + */ + virtual void cleanupRefs() {} +}; + +class BaseTagsCallback : public Callback +{ + BaseTags *tags; + public: + BaseTagsCallback(BaseTags *t) : tags(t) {} + virtual void process() { tags->cleanupRefs(); }; +}; + +#endif //__BASE_TAGS_HH__ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc new file mode 100644 index 0000000000..66d91b35b6 --- /dev/null +++ b/src/mem/cache/tags/fa_lru.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions a fully associative LRU tagstore. + */ + +#include + +#include + +#include "mem/cache/tags/fa_lru.hh" +#include "base/intmath.hh" + +using namespace std; + +FALRU::FALRU(int _blkSize, int _size, int hit_latency) + : blkSize(_blkSize), size(_size), + numBlks(size/blkSize), hitLatency(hit_latency) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) `%d' must be a power of two", + blkSize); + if (!(hitLatency > 0)) + fatal("Access latency in cycles must be at least one cycle"); + if (!isPowerOf2(size)) + fatal("Cache Size must be power of 2 for now"); + + // Track all cache sizes from 128K up by powers of 2 + numCaches = floorLog2(size) - 17; + if (numCaches >0){ + cacheBoundaries = new FALRUBlk *[numCaches]; + cacheMask = (1 << numCaches) - 1; + } else { + cacheMask = 0; + } + + warmedUp = false; + warmupBound = size/blkSize; + + blks = new FALRUBlk[numBlks]; + head = &(blks[0]); + tail = &(blks[numBlks-1]); + + head->prev = NULL; + head->next = &(blks[1]); + head->inCache = cacheMask; + + tail->prev = &(blks[numBlks-2]); + tail->next = NULL; + tail->inCache = 0; + + int index = (1 << 17) / blkSize; + int j = 0; + int flags = cacheMask; + for (int i = 1; i < numBlks-1; i++) { + blks[i].inCache = flags; + if (i == index - 1){ + cacheBoundaries[j] = &(blks[i]); + flags &= ~ (1<tag == blkAddr && blk->isValid(); +} + +void +FALRU::invalidateBlk(int asid, Addr addr) +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = (*tagHash.find(blkAddr)).second; + if (blk) { + assert(blk->tag == blkAddr); + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache) +{ + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Packet * &pkt, int &lat, int *inCache) +{ + Addr addr = pkt->paddr; + + accesses++; + int tmp_in_cache = 0; + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + tmp_in_cache = blk->inCache; + for (int i = 0; i < numCaches; i++) { + if (1<inCache) { + hits[i]++; + } else { + misses[i]++; + } + } + hits[numCaches]++; + if (blk != head){ + moveToHead(blk); + } + } else { + blk = NULL; + for (int i = 0; i < numCaches+1; ++i) { + misses[i]++; + } + } + if (inCache) { + *inCache = tmp_in_cache; + } + + lat = hitLatency; + //assert(check()); + return blk; +} + +FALRUBlk* +FALRU::findBlock(Addr addr, int asid) const +{ + Addr blkAddr = blkAlign(addr); + FALRUBlk* blk = hashLookup(blkAddr); + + if (blk && blk->isValid()) { + assert(blk->tag == blkAddr); + } else { + blk = NULL; + } + return blk; +} + +FALRUBlk* +FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + FALRUBlk * blk = tail; + assert(blk->inCache == 0); + moveToHead(blk); + tagHash.erase(blk->tag); + tagHash[blkAlign(pkt->paddr)] = blk; + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + //assert(check()); + return blk; +} + +void +FALRU::moveToHead(FALRUBlk *blk) +{ + int updateMask = blk->inCache ^ cacheMask; + for (int i = 0; i < numCaches; i++){ + if ((1<inCache &= ~(1<prev; + } else if (cacheBoundaries[i] == blk) { + cacheBoundaries[i] = blk->prev; + } + } + blk->inCache = cacheMask; + if (blk != head) { + if (blk == tail){ + assert(blk->next == NULL); + tail = blk->prev; + tail->next = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = head; + blk->prev = NULL; + head->prev = blk; + head = blk; + } +} + +bool +FALRU::check() +{ + FALRUBlk* blk = head; + int size = 0; + int boundary = 1<<17; + int j = 0; + int flags = cacheMask; + while (blk) { + size += blkSize; + if (blk->inCache != flags) { + return false; + } + if (size == boundary && blk != tail) { + if (cacheBoundaries[j] != blk) { + return false; + } + flags &=~(1 << j); + boundary = boundary<<1; + ++j; + } + blk = blk->next; + } + return true; +} diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh new file mode 100644 index 0000000000..7855f84550 --- /dev/null +++ b/src/mem/cache/tags/fa_lru.hh @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a fully associative LRU tag store. + */ + +#ifndef __FA_LRU_HH__ +#define __FA_LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" +#include "mem/packet.hh" +#include "base/hashmap.hh" +#include "mem/cache/tags/base_tags.hh" + +/** + * A fully associative cache block. + */ +class FALRUBlk : public CacheBlk +{ +public: + /** The previous block in LRU order. */ + FALRUBlk *prev; + /** The next block in LRU order. */ + FALRUBlk *next; + /** Has this block been touched? */ + bool isTouched; + + /** + * A bit mask of the sizes of cache that this block is resident in. + * Each bit represents a power of 2 in MB size cache. + * If bit 0 is set, this block is in a 1MB cache + * If bit 2 is set, this block is in a 4MB cache, etc. + * There is one bit for each cache smaller than the full size (default + * 16MB). + */ + int inCache; +}; + +/** + * A fully associative LRU cache. Keeps statistics for accesses to a number of + * cache sizes at once. + */ +class FALRU : public BaseTags +{ + public: + /** Typedef the block type used in this class. */ + typedef FALRUBlk BlkType; + /** Typedef a list of pointers to the local block type. */ + typedef std::list BlkList; + protected: + /** The block size of the cache. */ + const int blkSize; + /** The size of the cache. */ + const int size; + /** The number of blocks in the cache. */ + const int numBlks; // calculated internally + /** The hit latency of the cache. */ + const int hitLatency; + + /** Array of pointers to blocks at the cache size boundaries. */ + FALRUBlk **cacheBoundaries; + /** A mask for the FALRUBlk::inCache bits. */ + int cacheMask; + /** The number of different size caches being tracked. */ + int numCaches; + + /** The cache blocks. */ + FALRUBlk *blks; + + /** The MRU block. */ + FALRUBlk *head; + /** The LRU block. */ + FALRUBlk *tail; + + /** Hash table type mapping addresses to cache block pointers. */ + typedef m5::hash_map > hash_t; + /** Iterator into the address hash table. */ + typedef hash_t::const_iterator tagIterator; + + /** The address hash table. */ + hash_t tagHash; + + /** + * Find the cache block for the given address. + * @param addr The address to find. + * @return The cache block of the address, if any. + */ + FALRUBlk * hashLookup(Addr addr) const; + + /** + * Move a cache block to the MRU position. + * @param blk The block to promote. + */ + void moveToHead(FALRUBlk *blk); + + /** + * Check to make sure all the cache boundaries are still where they should + * be. Used for debugging. + * @return True if everything is correct. + */ + bool check(); + + /** + * @defgroup FALRUStats Fully Associative LRU specific statistics + * The FA lru stack lets us track multiple cache sizes at once. These + * statistics track the hits and misses for different cache sizes. + * @{ + */ + + /** Hits in each cache size >= 128K. */ + Stats::Vector<> hits; + /** Misses in each cache size >= 128K. */ + Stats::Vector<> misses; + /** Total number of accesses. */ + Stats::Scalar<> accesses; + + /** + * @} + */ + +public: + /** + * Construct and initialize this cache tagstore. + * @param blkSize The block size of the cache. + * @param size The size of the cache. + * @param hit_latency The hit latency of the cache. + */ + FALRU(int blkSize, int size, int hit_latency); + + /** + * Register the stats for this object. + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return true if the address is found in the cache. + * @param asid The address space ID. + * @param addr The address to look for. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the cache block that contains the given addr. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param addr The address to look for. + * @param asid The address space ID. + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid, int &lat, int *inCache = 0); + + /** + * Find the block in the cache and update the replacement data. Returns + * the access latency and the in cache flags as a side effect + * @param req The req whose block to find + * @param lat The latency of the access. + * @param inCache The FALRUBlk::inCache flags. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Packet * &pkt, int &lat, int *inCache = 0); + + /** + * Find the block in the cache, do not update the replacement data. + * @param addr The address to look for. + * @param asid The address space ID. + * @return Pointer to the cache block. + */ + FALRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + FALRUBlk* findReplacement(Packet * &pkt, PacketList* & writebacks, + BlkList &compress_blocks); + + /** + * Return the hit latency of this cache. + * @return The hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Return the block size of this cache. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size of this cache, always the block size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The aligned address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)(blkSize-1)); + } + + /** + * Generate the tag from the addres. For fully associative this is just the + * block address. + * @param addr The address to get the tag from. + * @param blk ignored here + * @return The tag. + */ + Addr extractTag(Addr addr, FALRUBlk *blk) const + { + return blkAlign(addr); + } + + /** + * Return the set of an address. Only one set in a fully associative cache. + * @param addr The address to get the set from. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Calculate the block offset of an address. + * @param addr the address to get the offset of. + * @return the block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & (Addr)(blkSize-1)); + } + + /** + * Regenerate the block address from the tag and the set. + * @param tag The tag of the block. + * @param set The set the block belongs to. + * @return the block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const + { + return (tag); + } + + /** + * Read the data out of the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The data from the cache block. + */ + void readData(FALRUBlk *blk, uint8_t *data) + { + } + + /** + * Write data into the internal storage of a cache block. FALRU + * currently doesn't support data storage. + * @param blk The cache block to be written. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(FALRUBlk *blk, uint8_t *data, int size, + PacketList* &writebacks) + { + } + + /** + * Unimplemented. Perform a cache block copy from block aligned addresses. + * @param source The block aligned source address. + * @param dest The block aligned destination adddress. + * @param asid The address space ID. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) + { + } + + /** + * Unimplemented. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + +}; + +#endif diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc new file mode 100644 index 0000000000..a574adaa32 --- /dev/null +++ b/src/mem/cache/tags/iic.cc @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of the Indirect Index Cache tagstore. + */ + +#include +#include +#include + +#include + +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/iic.hh" +#include "base/intmath.hh" +#include "sim/root.hh" // for curTick + +#include "base/trace.hh" // for DPRINTF + + +using namespace std; + +/** Track the number of accesses to each cache set. */ +#define PROFILE_IIC 1 + +IIC::IIC(IIC::Params ¶ms) : + hashSets(params.numSets), blkSize(params.blkSize), assoc(params.assoc), + hitLatency(params.hitLatency), subSize(params.subblockSize), + numSub(blkSize/subSize), + trivialSize((floorLog2(params.size/subSize)*numSub)/8), + tagShift(floorLog2(blkSize)), blkMask(blkSize - 1), + subShift(floorLog2(subSize)), subMask(numSub - 1), + hashDelay(params.hashDelay), + numBlocks(params.size/subSize), + numTags(hashSets * assoc + params.size/blkSize -1), + numSecondary(params.size/blkSize), + tagNull(numTags), + primaryBound(hashSets * assoc) +{ + int i; + + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (hashSets <= 0 || !isPowerOf2(hashSets)) { + fatal("# of hashsets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + if (numSub*subSize != blkSize) { + fatal("blocksize must be evenly divisible by subblock size"); + } + + // debug stuff + freeSecond = numSecondary; + + warmedUp = false; + warmupBound = params.size/blkSize; + + // Replacement Policy Initialization + repl = params.rp; + repl->setIIC(this); + + //last_miss_time = 0 + + // allocate data reference counters + dataReferenceCount = new int[numBlocks]; + memset(dataReferenceCount, 0, numBlocks*sizeof(int)); + + // Allocate storage for both internal data and block fast access data. + // We allocate it as one large chunk to reduce overhead and to make + // deletion easier. + int data_index = 0; + dataStore = new uint8_t[(numBlocks + numTags) * blkSize]; + dataBlks = new uint8_t*[numBlocks]; + for (i = 0; i < numBlocks; ++i) { + dataBlks[i] = &dataStore[data_index]; + freeDataBlock(i); + data_index += subSize; + } + + assert(data_index == numBlocks * subSize); + + // allocate and init tag store + tagStore = new IICTag[numTags]; + + int blkIndex = 0; + // allocate and init sets + sets = new IICSet[hashSets]; + for (i = 0; i < hashSets; ++i) { + sets[i].assoc = assoc; + sets[i].tags = new IICTag*[assoc]; + sets[i].chain_ptr = tagNull; + + for (int j = 0; j < assoc; ++j) { + IICTag *tag = &tagStore[blkIndex++]; + tag->chain_ptr = tagNull; + tag->data_ptr.resize(numSub); + tag->size = blkSize; + tag->trivialData = new uint8_t[trivialSize]; + tag->numData = 0; + sets[i].tags[j] = tag; + tag->set = i; + tag->data = &dataStore[data_index]; + data_index += blkSize; + } + } + + assert(blkIndex == primaryBound); + + for (i = primaryBound; i < tagNull; i++) { + tagStore[i].chain_ptr = i+1; + //setup data ptrs to subblocks + tagStore[i].data_ptr.resize(numSub); + tagStore[i].size = blkSize; + tagStore[i].trivialData = new uint8_t[trivialSize]; + tagStore[i].numData = 0; + tagStore[i].set = 0; + tagStore[i].data = &dataStore[data_index]; + data_index += blkSize; + } + freelist = primaryBound; +} + +IIC::~IIC() +{ + delete [] dataReferenceCount; + delete [] dataStore; + delete [] tagStore; + delete [] sets; +} + +/* register cache stats */ +void +IIC::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + hitHashDepth.init(0, 20, 1); + missHashDepth.init(0, 20, 1); + setAccess.init(0, hashSets, 1); + + /** IIC Statistics */ + hitHashDepth + .name(name + ".hit_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + missHashDepth + .name(name + ".miss_hash_depth_dist") + .desc("Dist. of Hash lookup depths") + .flags(pdf) + ; + + repl->regStats(name); + + if (PROFILE_IIC) + setAccess + .name(name + ".set_access_dist") + .desc("Dist. of Accesses across sets") + .flags(pdf) + ; + + missDepthTotal + .name(name + ".miss_depth_total") + .desc("Total of miss depths") + ; + + hashMiss + .name(name + ".hash_miss") + .desc("Total of misses in hash table") + ; + + hitDepthTotal + .name(name + ".hit_depth_total") + .desc("Total of hit depths") + ; + + hashHit + .name(name + ".hash_hit") + .desc("Total of hites in hash table") + ; +} + +// probe cache for presence of given block. +bool +IIC::probe(int asid, Addr addr) const +{ + return (findBlock(addr,asid) != NULL); +} + +IICTag* +IIC::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = hash(addr); + int set_lat; + + unsigned long chain_ptr; + + if (PROFILE_IIC) + setAccess.sample(set); + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + set_lat = 1; + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + set_lat += secondary_depth; + // set depth for statistics fix this later!!! egh + sets[set].depth = set_lat; + + if (tag_ptr != NULL) { + /* need to move tag into primary table */ + // need to preserve chain: fix this egh + sets[set].tags[assoc-1]->chain_ptr = tag_ptr->chain_ptr; + tagSwap(tag_ptr - tagStore, sets[set].tags[assoc-1] - tagStore); + tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + assert(tag_ptr!=NULL); + } + + } + set_lat = set_lat * hashDelay + hitLatency; + if (tag_ptr != NULL) { + // IIC replacement: if this is not the first element of + // list, reorder + sets[set].moveToHead(tag_ptr); + + hitHashDepth.sample(sets[set].depth); + hashHit++; + hitDepthTotal += sets[set].depth; + tag_ptr->status |= BlkReferenced; + lat = set_lat; + if (tag_ptr->whenReady > curTick && tag_ptr->whenReady - curTick > set_lat) { + lat = tag_ptr->whenReady - curTick; + } + + tag_ptr->refCount += 1; + } + else { + // fall through: cache block not found, not a hit... + missHashDepth.sample(sets[set].depth); + hashMiss++; + missDepthTotal += sets[set].depth; + lat = set_lat; + } + return tag_ptr; +} + +IICTag* +IIC::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = hash(addr); + + unsigned long chain_ptr; + + IICTag *tag_ptr = sets[set].findTag(asid, tag, chain_ptr); + if (tag_ptr == NULL && chain_ptr != tagNull) { + int secondary_depth; + tag_ptr = secondaryChain(asid, tag, chain_ptr, &secondary_depth); + } + return tag_ptr; +} + + +IICTag* +IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + DPRINTF(IIC, "Finding Replacement for %x\n", pkt->paddr); + unsigned set = hash(pkt->paddr); + IICTag *tag_ptr; + unsigned long *tmp_data = new unsigned long[numSub]; + + // Get a enough subblocks for a full cache line + for (int i = 0; i < numSub; ++i){ + tmp_data[i] = getFreeDataBlock(writebacks); + assert(dataReferenceCount[tmp_data[i]]==0); + } + + tag_ptr = getFreeTag(set, writebacks); + + tag_ptr->set = set; + for (int i=0; i< numSub; ++i) { + tag_ptr->data_ptr[i] = tmp_data[i]; + dataReferenceCount[tag_ptr->data_ptr[i]]++; + } + tag_ptr->numData = numSub; + assert(tag_ptr - tagStore < primaryBound); // make sure it is in primary + tag_ptr->chain_ptr = tagNull; + sets[set].moveToHead(tag_ptr); + delete [] tmp_data; + + list tag_indexes; + repl->doAdvance(tag_indexes); + while (!tag_indexes.empty()) { + if (!tagStore[tag_indexes.front()].isCompressed()) { + compress_blocks.push_back(&tagStore[tag_indexes.front()]); + } + tag_indexes.pop_front(); + } + + tag_ptr->re = (void*)repl->add(tag_ptr-tagStore); + + return tag_ptr; +} + +void +IIC::freeReplacementBlock(PacketList* & writebacks) +{ + IICTag *tag_ptr; + unsigned long data_ptr; + /* consult replacement policy */ + tag_ptr = &tagStore[repl->getRepl()]; + assert(tag_ptr->isValid()); + + DPRINTF(Cache, "Replacing %x in IIC: %s\n", + regenerateBlkAddr(tag_ptr->tag,0), + tag_ptr->isModified() ? "writeback" : "clean"); + /* write back replaced block data */ + if (tag_ptr && (tag_ptr->isValid())) { + int thread_num = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += tag_ptr->refCount; + ++sampledRefs; + tag_ptr->refCount = 0; + + if (tag_ptr->isModified()) { + Packet * writeback = + buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), + tag_ptr->req->asid, tag_ptr->xc, blkSize, + (cache->doData())?tag_ptr->data:0, + tag_ptr->size); + writebacks.push_back(writeback); + } + } + + // free the data blocks + for (int i = 0; i < tag_ptr->numData; ++i) { + data_ptr = tag_ptr->data_ptr[i]; + assert(dataReferenceCount[data_ptr]>0); + if (--dataReferenceCount[data_ptr] == 0) { + freeDataBlock(data_ptr); + } + } + freeTag(tag_ptr); +} + +unsigned long +IIC::getFreeDataBlock(PacketList* & writebacks) +{ + struct IICTag *tag_ptr; + unsigned long data_ptr; + + tag_ptr = NULL; + /* find data block */ + while (blkFreelist.empty()) { + freeReplacementBlock(writebacks); + } + + data_ptr = blkFreelist.front(); + blkFreelist.pop_front(); + DPRINTF(IICMore,"Found free data at %d\n",data_ptr); + return data_ptr; +} + + + +IICTag* +IIC::getFreeTag(int set, PacketList* & writebacks) +{ + unsigned long tag_index; + IICTag *tag_ptr; + // Add new tag + tag_ptr = sets[set].findFree(); + // if no free in primary, and secondary exists + if (!tag_ptr && numSecondary) { + // need to spill a tag into secondary storage + while (freelist == tagNull) { + // get replacements until one is in secondary + freeReplacementBlock(writebacks); + } + + tag_index = freelist; + freelist = tagStore[freelist].chain_ptr; + freeSecond--; + + assert(tag_index != tagNull); + tagSwap(tag_index, sets[set].tags[assoc-1] - tagStore); + tagStore[tag_index].chain_ptr = sets[set].chain_ptr; + sets[set].chain_ptr = tag_index; + + tag_ptr = sets[set].tags[assoc-1]; + } + DPRINTF(IICMore,"Found free tag at %d\n",tag_ptr - tagStore); + tagsInUse++; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + + return tag_ptr; +} + +void +IIC::freeTag(IICTag *tag_ptr) +{ + unsigned long tag_index, tmp_index; + // Fix tag_ptr + if (tag_ptr) { + // we have a tag to clear + DPRINTF(IICMore,"Freeing Tag for %x\n", + regenerateBlkAddr(tag_ptr->tag,0)); + tagsInUse--; + tag_ptr->status = 0; + tag_ptr->numData = 0; + tag_ptr->re = NULL; + tag_index = tag_ptr - tagStore; + if (tag_index >= primaryBound) { + // tag_ptr points to secondary store + assert(tag_index < tagNull); // remove this?? egh + if (tag_ptr->chain_ptr == tagNull) { + // need to fix chain list + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr == tag_index) { + sets[tmp_set].chain_ptr = tagNull; + } else { + tmp_index = sets[tmp_set].chain_ptr; + while (tmp_index != tagNull + && tagStore[tmp_index].chain_ptr != tag_index) { + tmp_index = tagStore[tmp_index].chain_ptr; + } + assert(tmp_index != tagNull); + tagStore[tmp_index].chain_ptr = tagNull; + } + tag_ptr->chain_ptr = freelist; + freelist = tag_index; + freeSecond++; + } else { + // copy next chained entry to this tag location + tmp_index = tag_ptr->chain_ptr; + tagSwap(tmp_index, tag_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + } + } else { + // tag_ptr in primary hash table + assert(tag_index < primaryBound); + tag_ptr->status = 0; + unsigned tmp_set = hash(tag_ptr->tag << tagShift); + if (sets[tmp_set].chain_ptr != tagNull) { // collapse chain + tmp_index = sets[tmp_set].chain_ptr; + tagSwap(tag_index, tmp_index); + tagStore[tmp_index].chain_ptr = freelist; + freelist = tmp_index; + freeSecond++; + sets[tmp_set].chain_ptr = tag_ptr->chain_ptr; + sets[tmp_set].moveToTail(tag_ptr); + } + } + } +} + +void +IIC::freeDataBlock(unsigned long data_ptr) +{ + assert(dataReferenceCount[data_ptr] == 0); + DPRINTF(IICMore, "Freeing data at %d\n", data_ptr); + blkFreelist.push_front(data_ptr); +} + +/** Use a simple modulo hash. */ +#define SIMPLE_HASH 0 + +unsigned +IIC::hash(Addr addr) const { +#if SIMPLE_HASH + return extractTag(addr) % iic_hash_size; +#else + Addr tag, mask, x, y; + tag = extractTag(addr); + mask = hashSets-1; /* assumes iic_hash_size is a power of 2 */ + x = tag & mask; + y = (tag >> (int)(::log(hashSets)/::log(2))) & mask; + assert (x < hashSets && y < hashSets); + return x ^ y; +#endif +} + + +void +IICSet::moveToHead(IICTag *tag) +{ + if (tags[0] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + IICTag *next = tag; + + do { + assert(i < assoc); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + ++i; + } while (next != tag); +} + +void +IICSet::moveToTail(IICTag *tag) +{ + if (tags[assoc-1] == tag) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = assoc - 1; + IICTag *next = tag; + + do { + assert(i >= 0); + // swap blks[i] and next + IICTag *tmp = tags[i]; + tags[i] = next; + next = tmp; + --i; + } while (next != tag); +} + +void +IIC::tagSwap(unsigned long index1, unsigned long index2) +{ + DPRINTF(IIC,"Swapping tag[%d]=%x for tag[%d]=%x\n",index1, + tagStore[index1].tag<fixTag(tagStore[index1].re, index2, index1); + if (tagStore[index2].isValid()) + repl->fixTag(tagStore[index2].re, index1, index2); +} + + +IICTag * +IIC::secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *_depth) const +{ + int depth = 0; + while (chain_ptr != tagNull) { + DPRINTF(IIC,"Searching secondary at %d for %x\n", chain_ptr, + tag<isCompressed()) { + // decompress the data here. + } +} + +void +IIC::compressBlock(unsigned long index) +{ + IICTag *tag_ptr = &tagStore[index]; + if (!tag_ptr->isCompressed()) { + // Compress the data here. + } +} + +void +IIC::invalidateBlk(int asid, Addr addr) +{ + IICTag* tag_ptr = findBlock(addr, asid); + if (tag_ptr) { + for (int i = 0; i < tag_ptr->numData; ++i) { + dataReferenceCount[tag_ptr->data_ptr[i]]--; + if (dataReferenceCount[tag_ptr->data_ptr[i]] == 0) { + freeDataBlock(tag_ptr->data_ptr[i]); + } + } + repl->removeEntry(tag_ptr->re); + freeTag(tag_ptr); + } +} + +void +IIC::readData(IICTag *blk, uint8_t *data){ + assert(cache->doData()); + assert(blk->size <= trivialSize || blk->numData > 0); + int data_size = blk->size; + if (data_size > trivialSize) { + for (int i = 0; i < blk->numData; ++i){ + memcpy(data+i*subSize, + &(dataBlks[blk->data_ptr[i]][0]), + (data_size>subSize)?subSize:data_size); + data_size -= subSize; + } + } else { + memcpy(data,blk->trivialData,data_size); + } +} + +void +IIC::writeData(IICTag *blk, uint8_t *write_data, int size, + PacketList* & writebacks){ + assert(cache->doData()); + assert(size < blkSize || !blk->isCompressed()); + DPRINTF(IIC, "Writing %d bytes to %x\n", size, + blk->tag< blk->numData) { + // need to allocate more data blocks + for (int i = blk->numData; i < num_subs; ++i){ + blk->data_ptr[i] = getFreeDataBlock(writebacks); + dataReferenceCount[blk->data_ptr[i]] += 1; + } + } else if (num_subs < blk->numData){ + // can free data blocks + for (int i=num_subs; i < blk->numData; ++i){ + // decrement reference count and compare to zero + /** + * @todo + * Make this work with copying. + */ + if (--dataReferenceCount[blk->data_ptr[i]] == 0) { + freeDataBlock(blk->data_ptr[i]); + } + } + } + + blk->numData = num_subs; + blk->size = size; + assert(size <= trivialSize || blk->numData > 0); + if (size > trivialSize){ + for (int i = 0; i < blk->numData; ++i){ + memcpy(&dataBlks[blk->data_ptr[i]][0], write_data + i*subSize, + (size>subSize)?subSize:size); + size -= subSize; + } + } else { + memcpy(blk->trivialData,write_data,size); + } +} + + +/** + * @todo This code can break if the src is evicted to get a tag for the dest. + */ +void +IIC::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + IICTag *dest_tag = findBlock(dest, asid); + + if (dest_tag) { + for (int i = 0; i < dest_tag->numData; ++i) { + if (--dataReferenceCount[dest_tag->data_ptr[i]] == 0) { + freeDataBlock(dest_tag->data_ptr[i]); + } + } + // Reset replacement entry + } else { + dest_tag = getFreeTag(hash(dest), writebacks); + dest_tag->re = (void*) repl->add(dest_tag - tagStore); + dest_tag->set = hash(dest); + dest_tag->tag = extractTag(dest); + dest_tag->req->asid = asid; + dest_tag->status = BlkValid | BlkWritable; + } + // Find the source tag here since it might move if we need to find a + // tag for the destination. + IICTag *src_tag = findBlock(source, asid); + assert(src_tag); + assert(!cache->doData() || src_tag->size <= trivialSize + || src_tag->numData > 0); + // point dest to source data and inc counter + for (int i = 0; i < src_tag->numData; ++i) { + dest_tag->data_ptr[i] = src_tag->data_ptr[i]; + ++dataReferenceCount[dest_tag->data_ptr[i]]; + } + + // Maintain fast access data. + memcpy(dest_tag->data, src_tag->data, blkSize); + + dest_tag->xc = src_tag->xc; + dest_tag->size = src_tag->size; + dest_tag->numData = src_tag->numData; + if (src_tag->numData == 0) { + // Data is stored in the trivial data, just copy it. + memcpy(dest_tag->trivialData, src_tag->trivialData, src_tag->size); + } + + dest_tag->status |= BlkDirty; + if (dest_tag->size < blkSize) { + dest_tag->status |= BlkCompressed; + } else { + dest_tag->status &= ~BlkCompressed; + } +} + +void +IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) +{ + // if reference counter is greater than 1, do copy + // else do write + Addr blk_addr = blkAlign(pkt->paddr); + IICTag* blk = findBlock(blk_addr, pkt->req->asid); + + if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) { + // copy the data + // Mark the block as referenced so it doesn't get replaced. + blk->status |= BlkReferenced; + for (int i = 0; i < blk->numData; ++i){ + unsigned long new_data = getFreeDataBlock(writebacks); + // Need to refresh pointer + /** + * @todo Remove this refetch once we change IIC to pointer based + */ + blk = findBlock(blk_addr, pkt->req->asid); + assert(blk); + if (cache->doData()) { + memcpy(&(dataBlks[new_data][0]), + &(dataBlks[blk->data_ptr[i]][0]), + subSize); + } + dataReferenceCount[blk->data_ptr[i]]--; + dataReferenceCount[new_data]++; + blk->data_ptr[i] = new_data; + } + } +} + +void +IIC::cleanupRefs() +{ + for (int i = 0; i < numTags; ++i) { + if (tagStore[i].isValid()) { + totalRefs += tagStore[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh new file mode 100644 index 0000000000..ef3f03c534 --- /dev/null +++ b/src/mem/cache/tags/iic.hh @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of the Indirect Index Cache (IIC) tags store. + */ + +#ifndef __IIC_HH__ +#define __IIC_HH__ + +#include +#include + +#include "mem/cache/cache_blk.hh" +#include "mem/cache/tags/repl/repl.hh" +#include "mem/packet.hh" +#include "base/statistics.hh" +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; // Forward declaration + +/** + * IIC cache blk. + */ +class IICTag : public CacheBlk +{ + public: + /** + * Copy the contents of the given IICTag into this one. + * @param rhs The tag to copy. + * @return const reference to this tag. + */ + const IICTag& operator=(const IICTag& rhs) + { + CacheBlk::operator=(rhs); + chain_ptr = rhs.chain_ptr; + re = rhs.re; + set = rhs.set; + trivialData = rhs.trivialData; + numData = rhs.numData; + data_ptr.clear(); + for (int i = 0; i < rhs.numData; ++i) { + data_ptr.push_back(rhs.data_ptr[i]); + } + return *this; + } + + /** Hash chain pointer into secondary store. */ + unsigned long chain_ptr; + /** Data array pointers for each subblock. */ + std::vector data_ptr; + /** Replacement Entry pointer. */ + void *re; + /** + * An array to store small compressed data. Conceputally the same size + * as the unsused data array pointers. + */ + uint8_t *trivialData; + /** + * The number of allocated subblocks. + */ + int numData; +}; + +/** + * A hash set for the IIC primary lookup table. + */ +class IICSet{ + public: + /** The associativity of the primary table. */ + int assoc; + + /** The number of hash chains followed when finding the last block. */ + int depth; + /** The current number of blocks on the chain. */ + int size; + + /** Tag pointer into the secondary tag storage. */ + unsigned long chain_ptr; + + /** The LRU list of the primary table. MRU is at 0 index. */ + IICTag ** tags; + + /** + * Find the addr in this set, return the chain pointer to the secondary if + * it isn't found. + * @param asid The address space ID. + * @param tag The address to find. + * @param chain_ptr The chain pointer to start the search of the secondary + * @return Pointer to the tag, NULL if not found. + */ + IICTag* findTag(int asid, Addr tag, unsigned long &chain_ptr) + { + depth = 1; + for (int i = 0; i < assoc; ++i) { + if (tags[i]->tag == tag && tags[i]->isValid()) { + return tags[i]; + } + } + chain_ptr = this->chain_ptr; + return 0; + } + + /** + * Find an usused tag in this set. + * @return Pointer to the unused tag, NULL if none are free. + */ + IICTag* findFree() + { + for (int i = 0; i < assoc; ++i) { + if (!tags[i]->isValid()) { + return tags[i]; + } + } + return 0; + } + + /** + * Move a tag to the head of the LRU list + * @param tag The tag to move. + */ + void moveToHead(IICTag *tag); + + /** + * Move a tag to the tail (LRU) of the LRU list + * @param tag The tag to move. + */ + void moveToTail(IICTag *tag); +}; + +/** + * The IIC tag store. This is a hardware-realizable, fully-associative tag + * store that uses software replacement, e.g. Gen. + */ +class IIC : public BaseTags +{ + public: + /** Typedef of the block type used in this class. */ + typedef IICTag BlkType; + /** Typedef for list of pointers to the local block type. */ + typedef std::list BlkList; + protected: + /** The number of set in the primary table. */ + const int hashSets; + /** The block size in bytes. */ + const int blkSize; + /** The associativity of the primary table. */ + const int assoc; + /** The base hit latency. */ + const int hitLatency; + /** The subblock size, used for compression. */ + const int subSize; + + /** The number of subblocks */ + const int numSub; + /** The number of bytes used by data pointers */ + const int trivialSize; + + /** The amount to shift address to get the tag. */ + const int tagShift; + /** The mask to get block offset bits. */ + const unsigned blkMask; + + /** The amount to shift to get the subblock number. */ + const int subShift; + /** The mask to get the correct subblock number. */ + const unsigned subMask; + + /** The latency of a hash lookup. */ + const int hashDelay; + /** The number of data blocks. */ + const int numBlocks; + /** The total number of tags in primary and secondary. */ + const int numTags; + /** The number of tags in the secondary tag store. */ + const int numSecondary; + + /** The Null tag pointer. */ + const int tagNull; + /** The last tag in the primary table. */ + const int primaryBound; + + /** All of the tags */ + IICTag *tagStore; + /** + * Pointer to the head of the secondary freelist (maintained with chain + * pointers. + */ + unsigned long freelist; + /** + * The data block freelist. + */ + std::list blkFreelist; + + /** The primary table. */ + IICSet *sets; + + /** The replacement policy. */ + Repl *repl; + + /** An array of data reference counters. */ + int *dataReferenceCount; + + /** The data blocks. */ + uint8_t *dataStore; + + /** Storage for the fast access data of each cache block. */ + uint8_t **dataBlks; + + /** + * Count of the current number of free secondary tags. + * Used for debugging. + */ + int freeSecond; + + // IIC Statistics + /** + * @addtogroup IICStatistics IIC Statistics + * @{ + */ + + /** Hash hit depth of cache hits. */ + Stats::Distribution<> hitHashDepth; + /** Hash depth for cache misses. */ + Stats::Distribution<> missHashDepth; + /** Count of accesses to each hash set. */ + Stats::Distribution<> setAccess; + + /** The total hash depth for every miss. */ + Stats::Scalar<> missDepthTotal; + /** The total hash depth for all hits. */ + Stats::Scalar<> hitDepthTotal; + /** The number of hash misses. */ + Stats::Scalar<> hashMiss; + /** The number of hash hits. */ + Stats::Scalar<> hashHit; + /** @} */ + + public: + /** + * Collection of parameters for the IIC. + */ + class Params { + public: + /** The size in bytes of the cache. */ + int size; + /** The number of sets in the primary table. */ + int numSets; + /** The block size in bytes. */ + int blkSize; + /** The associativity of the primary table. */ + int assoc; + /** The number of cycles for each hash lookup. */ + int hashDelay; + /** The number of cycles to read the data. */ + int hitLatency; + /** The replacement policy. */ + Repl *rp; + /** The subblock size in bytes. */ + int subblockSize; + }; + + /** + * Construct and initialize this tag store. + * @param params The IIC parameters. + * @todo + * Should make a way to have less tags in the primary than blks in the + * cache. Also should be able to specify number of secondary blks. + */ + IIC(Params ¶ms); + + /** + * Destructor. + */ + virtual ~IIC(); + + /** + * Register the statistics. + * @param name The name to prepend to the statistic descriptions. + */ + void regStats(const std::string &name); + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set Not needed for the iic. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) { + return (((Addr)tag << tagShift)); + } + + /** + * Return the block size. + * @return The block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. + * @return The subblock size. + */ + int getSubBlockSize() + { + return subSize; + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @param blk Ignored here. + * @return the tag. + */ + Addr extractTag(Addr addr, IICTag *blk) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the address. + * @param addr The address to a get a tag for. + * @return the tag. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Return the set, always 0 for IIC. + * @return 0. + */ + int extractSet(Addr addr) const + { + return 0; + } + + /** + * Get the block offset of an address. + * @param addr The address to get the offset of. + * @return the block offset of the address. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Check for the address in the tagstore. + * @param asid The address space ID. + * @param addr The address to find. + * @return true if it is found. + */ + bool probe(int asid, Addr addr) const; + + /** + * Swap the position of two tags. + * @param index1 The first tag location. + * @param index2 The second tag location. + */ + void tagSwap(unsigned long index1, unsigned long index2); + + /** + * Clear the reference bit of the tag and return its old value. + * @param index The pointer of the tag to manipulate. + * @return The previous state of the reference bit. + */ + bool clearRef(unsigned long index) + { + bool tmp = tagStore[index].isReferenced(); + tagStore[index].status &= ~BlkReferenced; + return tmp; + } + + /** + * Decompress a block if it is compressed. + * @param index The tag store index for the block to uncompress. + */ + void decompressBlock(unsigned long index); + + /** + * Try and compress a block if it is not already compressed. + * @param index The tag store index for the block to compress. + */ + void compressBlock(unsigned long index); + + /** + * Invalidate the block containing the address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid, int &lat); + + /** + * Find the block and update the replacement data. This call also returns + * the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Packet * &pkt, int &lat); + + /** + * Find the block, do not update the replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return A pointer to the block found, if any. + */ + IICTag* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + IICTag* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Read the data from the internal storage of the given cache block. + * @param blk The block to read the data from. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(IICTag *blk, uint8_t *data); + + /** + * Write the data into the internal storage of the given cache block. + * @param blk The block to write to. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(IICTag *blk, uint8_t *data, int size, + PacketList* & writebacks); + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * If a block is currently marked copy on write, copy it before writing. + * @param req The write request. + * @param writebacks List for any generated writeback requests. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks); + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +private: + /** + * Return the hash of the address. + * @param addr The address to hash. + * @return the hash of the address. + */ + unsigned hash(Addr addr) const; + + /** + * Search for a block in the secondary tag store. Returns the number of + * hash lookups as a side effect. + * @param asid The address space ID. + * @param tag The tag to match. + * @param chain_ptr The first entry to search. + * @param depth The number of hash lookups made while searching. + * @return A pointer to the block if found. + */ + IICTag *secondaryChain(int asid, Addr tag, unsigned long chain_ptr, + int *depth) const; + + /** + * Free the resources associated with the next replacement block. + * @param writebacks A list of any writebacks to perform. + */ + void freeReplacementBlock(PacketList* & writebacks); + + /** + * Return the pointer to a free data block. + * @param writebacks A list of any writebacks to perform. + * @return A pointer to a free data block. + */ + unsigned long getFreeDataBlock(PacketList* & writebacks); + + /** + * Get a free tag in the given hash set. + * @param set The hash set to search. + * @param writebacks A list of any writebacks to perform. + * @return a pointer to a free tag. + */ + IICTag* getFreeTag(int set, PacketList* & writebacks); + + /** + * Free the resources associated with the given tag. + * @param tag_ptr The tag to free. + */ + void freeTag(IICTag *tag_ptr); + + /** + * Mark the given data block as being available. + * @param data_ptr The data block to free. + */ + void freeDataBlock(unsigned long data_ptr); +}; +#endif // __IIC_HH__ + diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc new file mode 100644 index 0000000000..0fe88fd087 --- /dev/null +++ b/src/mem/cache/tags/lru.cc @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Definitions of LRU tag store. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/lru.hh" +#include "sim/root.hh" + +using namespace std; + +LRUBlk* +CacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +CacheSet::moveToHead(LRUBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + LRUBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + LRUBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + LRUBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new CacheSet[numSets]; + blks = new LRUBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new LRUBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->req->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + } + } +} + +LRU::~LRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +// probe cache for presence of given block. +bool +LRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + LRUBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick + && blk->whenReady - curTick > hitLatency) { + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + } + + return blk; +} + +LRUBlk* +LRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + LRUBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +LRUBlk* +LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + // grab a replacement candidate + LRUBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + return blk; +} + +void +LRU::invalidateBlk(int asid, Addr addr) +{ + LRUBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + LRUBlk *source_blk = findBlock(source, asid); + assert(source_blk); + LRUBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +LRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh new file mode 100644 index 0000000000..9b4a557772 --- /dev/null +++ b/src/mem/cache/tags/lru.hh @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declaration of a LRU tag store. + */ + +#ifndef __LRU_HH__ +#define __LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * LRU cache block. + */ +class LRUBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; +}; + +/** + * An associative set of cache blocks. + */ +class CacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + LRUBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + LRUBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(LRUBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class LRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef LRUBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + + /** The cache sets. */ + CacheSet *sets; + + /** The cache blocks. */ + LRUBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency); + + /** + * Destructor + */ + virtual ~LRU(); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The request whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + LRUBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + LRUBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, LRUBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(LRUBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(LRUBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/repl/gen.cc b/src/mem/cache/tags/repl/gen.cc new file mode 100644 index 0000000000..ec1c2aaf39 --- /dev/null +++ b/src/mem/cache/tags/repl/gen.cc @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + */ + +/** + * @file + * Definitions of the Generational replacement policy. + */ + +#include + +#include "base/misc.hh" +#include "mem/cache/tags/iic.hh" +#include "mem/cache/tags/repl/gen.hh" +#include "sim/builder.hh" +#include "sim/host.hh" + +using namespace std; + +GenRepl::GenRepl(const string &_name, + int _num_pools, + int _fresh_res, + int _pool_res) // fix this, should be set by cache + : Repl(_name) +{ + num_pools = _num_pools; + fresh_res = _fresh_res; + pool_res = _pool_res; + num_entries = 0; + num_pool_entries = 0; + misses = 0; + pools = new GenPool[num_pools+1]; +} + +GenRepl::~GenRepl() +{ + delete [] pools; +} + +unsigned long +GenRepl::getRepl() +{ + unsigned long tmp; + GenReplEntry *re; + int i; + int num_seen = 0; + if (!(num_pool_entries>0)) { + fatal("No blks available to replace"); + } + num_entries--; + num_pool_entries--; + for (i = 0; i < num_pools; i++) { + while ((re = pools[i].pop())) { + num_seen++; + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp = re->tag_ptr; + delete re; + + repl_pool.sample(i); + + return tmp; + } + } + } + fatal("No replacement found"); + return 0xffffffff; +} + +unsigned long * +GenRepl::getNRepl(int n) +{ + unsigned long *tmp; + GenReplEntry *re; + int i; + if (!(num_pool_entries>(n-1))) { + fatal("Not enough blks available to replace"); + } + num_entries -= n; + num_pool_entries -= n; + tmp = new unsigned long[n]; /* array of cache_blk pointers */ + int blk_index = 0; + for (i = 0; i < num_pools && blk_index < n; i++) { + while (blk_index < n && (re = pools[i].pop())) { + // Remove invalidated entries + if (!re->valid) { + delete re; + continue; + } + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + } + else { + tmp[blk_index] = re->tag_ptr; + blk_index++; + delete re; + repl_pool.sample(i); + } + } + } + if (blk_index >= n) + return tmp; + /* search the fresh pool */ + + fatal("No N replacements found"); + return NULL; +} + +void +GenRepl::doAdvance(std::list &demoted) +{ + int i; + int num_seen = 0; + GenReplEntry *re; + misses++; + for (i=0; i pool_res && (re = pools[i].pop())!=NULL) { + if (iic->clearRef(re->tag_ptr)) { + pools[(((i+1)== num_pools)? i :i+1)].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(i); + } + else { + pools[(((i-1)<0)?i:i-1)].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(i); + } + } + num_seen += pools[i].size; + } + while (misses-pools[num_pools].oldest > fresh_res + && (re = pools[num_pools].pop())!=NULL) { + num_pool_entries++; + if (iic->clearRef(re->tag_ptr)) { + pools[num_pools/2].push(re, misses); + /** @todo Not really demoted, but use it for now. */ + demoted.push_back(re->tag_ptr); + advance_pool.sample(num_pools); + } + else { + pools[num_pools/2-1].push(re, misses); + demoted.push_back(re->tag_ptr); + demote_pool.sample(num_pools); + } + } +} + +void* +GenRepl::add(unsigned long tag_index) +{ + GenReplEntry *re = new GenReplEntry; + re->tag_ptr = tag_index; + re->valid = true; + pools[num_pools].push(re, misses); + num_entries++; + return (void*)re; +} + +void +GenRepl::regStats(const string name) +{ + using namespace Stats; + + /** GEN statistics */ + repl_pool + .init(0, 16, 1) + .name(name + ".repl_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + advance_pool + .init(0, 16, 1) + .name(name + ".advance_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; + + demote_pool + .init(0, 16, 1) + .name(name + ".demote_pool_dist") + .desc("Dist. of Repl. across pools") + .flags(pdf) + ; +} + +int +GenRepl::fixTag(void* _re, unsigned long old_index, unsigned long new_index) +{ + GenReplEntry *re = (GenReplEntry*)_re; + assert(re->valid); + if (re->tag_ptr == old_index) { + re->tag_ptr = new_index; + return 1; + } + fatal("Repl entry: tag ptrs do not match"); + return 0; +} + +bool +GenRepl::findTagPtr(unsigned long index) +{ + for (int i = 0; i < num_pools + 1; ++i) { + list::const_iterator iter = pools[i].entries.begin(); + list::const_iterator end = pools[i].entries.end(); + for (; iter != end; ++iter) { + if ((*iter)->valid && (*iter)->tag_ptr == index) { + return true; + } + } + } + return false; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + Param num_pools; + Param fresh_res; + Param pool_res; + +END_DECLARE_SIM_OBJECT_PARAMS(GenRepl) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(GenRepl) + + INIT_PARAM(num_pools, "capacity in bytes"), + INIT_PARAM(fresh_res, "associativity"), + INIT_PARAM(pool_res, "block size in bytes") + +END_INIT_SIM_OBJECT_PARAMS(GenRepl) + + +CREATE_SIM_OBJECT(GenRepl) +{ + return new GenRepl(getInstanceName(), num_pools, fresh_res, pool_res); +} + +REGISTER_SIM_OBJECT("GenRepl", GenRepl) + +#endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/gen.hh b/src/mem/cache/tags/repl/gen.hh new file mode 100644 index 0000000000..c1ceb3f4eb --- /dev/null +++ b/src/mem/cache/tags/repl/gen.hh @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + */ + +/** + * @file + * Declarations of generational replacement policy + */ + +#ifndef ___GEN_HH__ +#define __GEN_HH__ + +#include + +#include "base/statistics.hh" +#include "mem/cache/tags/repl/repl.hh" + +/** + * Generational Replacement entry. + */ +class GenReplEntry +{ + public: + /** Valid flag, used to quickly invalidate bogus entries. */ + bool valid; + /** The difference between this entry and the previous in the pool. */ + int delta; + /** Pointer to the corresponding tag in the IIC. */ + unsigned long tag_ptr; +}; + +/** + * Generational replacement pool + */ +class GenPool +{ + public: + /** The time the last entry was added. */ + Tick newest; + /** The time the oldest entry was added. */ + Tick oldest; + /** List of the replacement entries in this pool. */ + std::list entries; + + /** The number of entries in this pool. */ + int size; + + /** + * Simple constructor. + */ + GenPool() { + newest = 0; + oldest = 0; + size = 0; + } + + /** + * Add an entry to this pool. + * @param re The entry to add. + * @param now The current time. + */ + void push(GenReplEntry *re, Tick now) { + ++size; + if (!entries.empty()) { + re->delta = now - newest; + newest = now; + } else { + re->delta = 0; + newest = oldest = now; + } + entries.push_back(re); + } + + /** + * Remove an entry from the pool. + * @return The entry at the front of the list. + */ + GenReplEntry* pop() { + GenReplEntry *tmp = NULL; + if (!entries.empty()) { + --size; + tmp = entries.front(); + entries.pop_front(); + oldest += tmp->delta; + } + return tmp; + } + + /** + * Return the entry at the front of the list. + * @return the entry at the front of the list. + */ + GenReplEntry* top() { + return entries.front(); + } + + /** + * Destructor. + */ + ~GenPool() { + while (!entries.empty()) { + GenReplEntry *tmp = entries.front(); + entries.pop_front(); + delete tmp; + } + } +}; + +/** + * Generational replacement policy for use with the IIC. + * @todo update to use STL and for efficiency + */ +class GenRepl : public Repl +{ + public: + /** The array of pools. */ + GenPool *pools; + /** The number of pools. */ + int num_pools; + /** The amount of time to stay in the fresh pool. */ + int fresh_res; + /** The amount of time to stay in the normal pools. */ + int pool_res; + /** The maximum number of entries */ + int num_entries; + /** The number of entries currently in the pools. */ + int num_pool_entries; + /** The number of misses. Used as the internal time. */ + Tick misses; + + // Statistics + + /** + * @addtogroup CacheStatistics + * @{ + */ + /** The number of replacements from each pool. */ + Stats::Distribution<> repl_pool; + /** The number of advances out of each pool. */ + Stats::Distribution<> advance_pool; + /** The number of demotions from each pool. */ + Stats::Distribution<> demote_pool; + /** + * @} + */ + + /** + * Constructs and initializes this replacement policy. + * @param name The name of the policy. + * @param num_pools The number of pools to use. + * @param fresh_res The amount of time to wait in the fresh pool. + * @param pool_res The amount of time to wait in the normal pools. + */ + GenRepl(const std::string &name, int num_pools, + int fresh_res, int pool_res); + + /** + * Destructor. + */ + ~GenRepl(); + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl(); + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n); + + /** + * Update replacement data + */ + virtual void doAdvance(std::list &demoted); + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index); + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name); + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index); + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) + { + ((GenReplEntry*)re)->valid = false; + } + + protected: + /** + * Debug function to verify that there is only one repl entry per tag. + * @param index The tag index to check. + */ + bool findTagPtr(unsigned long index); +}; + +#endif /* __GEN_HH__ */ diff --git a/src/mem/cache/tags/repl/repl.cc b/src/mem/cache/tags/repl/repl.cc new file mode 100644 index 0000000000..ce781eb9f7 --- /dev/null +++ b/src/mem/cache/tags/repl/repl.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Nathan Binkert + */ + +/** + * Definitions of the base replacement class. + */ + +#include "sim/param.hh" +#include "mem/cache/tags/repl/repl.hh" + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +DEFINE_SIM_OBJECT_CLASS_NAME("Repl", Repl) + +#endif //DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/mem/cache/tags/repl/repl.hh b/src/mem/cache/tags/repl/repl.hh new file mode 100644 index 0000000000..7c289a5c1d --- /dev/null +++ b/src/mem/cache/tags/repl/repl.hh @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Erik Hallnor + * Steve Reinhardt + * Nathan Binkert + */ + +/** + * @file + * Declaration of a base replacement policy class. + */ + +#ifndef __REPL_HH__ +#define __REPL_HH__ + +#include +#include + +#include "cpu/smt.hh" +#include "sim/host.hh" +#include "sim/sim_object.hh" + + +class IIC; + +/** + * A pure virtual base class that defines the interface of a replacement + * policy. + */ +class Repl : public SimObject +{ + public: + /** Pointer to the IIC using this policy. */ + IIC *iic; + + /** + * Construct and initialize this polixy. + * @param name The instance name of this policy. + */ + Repl (const std::string &name) + : SimObject(name) + { + iic = NULL; + } + + /** + * Set the back pointer to the IIC. + * @param iic_ptr Pointer to the IIC. + */ + void setIIC(IIC *iic_ptr) + { + iic = iic_ptr; + } + + /** + * Returns the tag pointer of the cache block to replace. + * @return The tag to replace. + */ + virtual unsigned long getRepl() = 0; + + /** + * Return an array of N tag pointers to replace. + * @param n The number of tag pointer to return. + * @return An array of tag pointers to replace. + */ + virtual unsigned long *getNRepl(int n) = 0; + + /** + * Update replacement data + */ + virtual void doAdvance(std::list &demoted) = 0; + + /** + * Add a tag to the replacement policy and return a pointer to the + * replacement entry. + * @param tag_index The tag to add. + * @return The replacement entry. + */ + virtual void* add(unsigned long tag_index) = 0; + + /** + * Register statistics. + * @param name The name to prepend to statistic descriptions. + */ + virtual void regStats(const std::string name) = 0; + + /** + * Update the tag pointer to when the tag moves. + * @param re The replacement entry of the tag. + * @param old_index The old tag pointer. + * @param new_index The new tag pointer. + * @return 1 if successful, 0 otherwise. + */ + virtual int fixTag(void *re, unsigned long old_index, + unsigned long new_index) = 0; + + /** + * Remove this entry from the replacement policy. + * @param re The replacement entry to remove + */ + virtual void removeEntry(void *re) = 0; +}; + +#endif /* SMT_REPL_HH */ diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc new file mode 100644 index 0000000000..9d9036abb2 --- /dev/null +++ b/src/mem/cache/tags/split.cc @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of split cache tag store. + */ + +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/intmath.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "mem/cache/base_cache.hh" +#include "mem/cache/tags/split.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "mem/cache/tags/split_lru.hh" + + +using namespace std; +using namespace TheISA; + +// create and initialize a partitioned cache structure +Split::Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency) : + numSets(_numSets), blkSize(_blkSize), lifo(_lifo), hitLatency(_hit_latency) +{ + DPRINTF(Split, "new split cache!!\n"); + + DPRINTF(Split, "lru has %d numSets, %d blkSize, %d assoc, and %d hit_latency\n", + numSets, blkSize, LRU1_assoc, hitLatency); + + lru = new SplitLRU(_numSets, _blkSize, LRU1_assoc, _hit_latency, 1); + + if (total_ways - LRU1_assoc == 0) { + lifo_net = NULL; + lru_net = NULL; + } else { + if (lifo) { + DPRINTF(Split, "Other partition is a LIFO with size %d in bytes. it gets %d ways\n", + (total_ways - LRU1_assoc)*_numSets*_blkSize, (total_ways - LRU1_assoc)); + lifo_net = new SplitLIFO(_blkSize, (total_ways - LRU1_assoc)*_numSets*_blkSize, + (total_ways - LRU1_assoc), _hit_latency, _two_queue, 2); + lru_net = NULL; + } + else { + DPRINTF(Split, "other LRU gets %d ways\n", total_ways - LRU1_assoc); + lru_net = new SplitLRU(_numSets, _blkSize, total_ways - LRU1_assoc, _hit_latency, 2); + lifo_net = NULL; + } + } + + blkMask = blkSize - 1; + + if (!isPowerOf2(total_ways)) + warn("total cache ways/columns %d should be power of 2", + total_ways); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * total_ways; + +} + +Split::~Split() +{ + delete lru; + if (lifo) + delete lifo_net; + else + delete lru_net; +} + +void +Split::regStats(const string &name) +{ + using namespace Stats; + + BaseTags::regStats(name); + + usedEvictDist.init(0,3000,40); + unusedEvictDist.init(0,3000,40); + useByCPUCycleDist.init(0,35,1); + + nic_repl + .name(name + ".nic_repl") + .desc("number of replacements in the nic partition") + .precision(0) + ; + + cpu_repl + .name(name + ".cpu_repl") + .desc("number of replacements in the cpu partition") + .precision(0) + ; + + lru->regStats(name + ".lru"); + + if (lifo && lifo_net) { + lifo_net->regStats(name + ".lifo_net"); + } else if (lru_net) { + lru_net->regStats(name + ".lru_net"); + } + + nicUsedWhenEvicted + .name(name + ".nicUsedWhenEvicted") + .desc("number of NIC blks that were used before evicted") + ; + + nicUsedTotLatency + .name(name + ".nicUsedTotLatency") + .desc("total cycles before eviction of used NIC blks") + ; + + nicUsedTotEvicted + .name(name + ".nicUsedTotEvicted") + .desc("total number of used NIC blks evicted") + ; + + nicUsedAvgLatency + .name(name + ".nicUsedAvgLatency") + .desc("avg number of cycles a used NIC blk is in cache") + .precision(0) + ; + nicUsedAvgLatency = nicUsedTotLatency / nicUsedTotEvicted; + + usedEvictDist + .name(name + ".usedEvictDist") + .desc("distribution of used NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUnusedWhenEvicted + .name(name + ".nicUnusedWhenEvicted") + .desc("number of NIC blks that were unused when evicted") + ; + + nicUnusedTotLatency + .name(name + ".nicUnusedTotLatency") + .desc("total cycles before eviction of unused NIC blks") + ; + + nicUnusedTotEvicted + .name(name + ".nicUnusedTotEvicted") + .desc("total number of unused NIC blks evicted") + ; + + nicUnusedAvgLatency + .name(name + ".nicUnusedAvgLatency") + .desc("avg number of cycles an unused NIC blk is in cache") + .precision(0) + ; + nicUnusedAvgLatency = nicUnusedTotLatency / nicUnusedTotEvicted; + + unusedEvictDist + .name(name + ".unusedEvictDist") + .desc("distribution of unused NIC blk eviction times") + .flags(pdf | cdf) + ; + + nicUseByCPUCycleTotal + .name(name + ".nicUseByCPUCycleTotal") + .desc("total latency of NIC blks til usage time") + ; + + nicBlksUsedByCPU + .name(name + ".nicBlksUsedByCPU") + .desc("total number of NIC blks used") + ; + + nicAvgUsageByCPULatency + .name(name + ".nicAvgUsageByCPULatency") + .desc("average number of cycles before a NIC blk that is used gets used") + .precision(0) + ; + nicAvgUsageByCPULatency = nicUseByCPUCycleTotal / nicBlksUsedByCPU; + + useByCPUCycleDist + .name(name + ".useByCPUCycleDist") + .desc("the distribution of cycle time in cache before NIC blk is used") + .flags(pdf | cdf) + ; + + cpuUsedBlks + .name(name + ".cpuUsedBlks") + .desc("number of cpu blks that were used before evicted") + ; + + cpuUnusedBlks + .name(name + ".cpuUnusedBlks") + .desc("number of cpu blks that were unused before evicted") + ; + + nicAvgLatency + .name(name + ".nicAvgLatency") + .desc("avg number of cycles a NIC blk is in cache before evicted") + .precision(0) + ; + nicAvgLatency = (nicUnusedTotLatency + nicUsedTotLatency) / + (nicUnusedTotEvicted + nicUsedTotEvicted); + + NR_CP_hits + .name(name + ".NR_CP_hits") + .desc("NIC requests hitting in CPU Partition") + ; + + NR_NP_hits + .name(name + ".NR_NP_hits") + .desc("NIC requests hitting in NIC Partition") + ; + + CR_CP_hits + .name(name + ".CR_CP_hits") + .desc("CPU requests hitting in CPU partition") + ; + + CR_NP_hits + .name(name + ".CR_NP_hits") + .desc("CPU requests hitting in NIC partition") + ; + +} + +// probe cache for presence of given block. +bool +Split::probe(int asid, Addr addr) const +{ + bool success = lru->probe(asid, addr); + if (!success) { + if (lifo && lifo_net) + success = lifo_net->probe(asid, addr); + else if (lru_net) + success = lru_net->probe(asid, addr); + } + + return success; +} + +SplitBlk* +Split::findBlock(Packet * &pkt, int &lat) +{ + + Addr aligned = blkAlign(pkt->paddr); + + if (memHash.count(aligned)) { + memHash[aligned]++; + } else if (pkt->nic_pkt) { + memHash[aligned] = 1; + } + + SplitBlk *blk = lru->findBlock(pkt->paddr, pkt->req->asid, lat); + if (blk) { + if (pkt->nic_pkt) { + NR_CP_hits++; + } else { + CR_CP_hits++; + } + } else { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(pkt->paddr, pkt->req->asid, lat); + + } else if (lru_net) { + blk = lru_net->findBlock(pkt->paddr, pkt->req->asid, lat); + } + if (blk) { + if (pkt->nic_pkt) { + NR_NP_hits++; + } else { + CR_NP_hits++; + } + } + } + + if (blk) { + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (!blk->isUsed && !pkt->nic_pkt) { + useByCPUCycleDist.sample(latency); + nicUseByCPUCycleTotal += latency; + nicBlksUsedByCPU++; + } + } + blk->isUsed = true; + + if (pkt->nic_pkt) { + DPRINTF(Split, "found block in partition %d\n", blk->part); + } + } + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid, int &lat) +{ + SplitBlk *blk = lru->findBlock(addr, asid, lat); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid, lat); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid, lat); + } + } + + return blk; +} + +SplitBlk* +Split::findBlock(Addr addr, int asid) const +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) { + blk = lifo_net->findBlock(addr, asid); + } else if (lru_net) { + blk = lru_net->findBlock(addr, asid); + } + } + + return blk; +} + +SplitBlk* +Split::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + SplitBlk *blk; + + if (pkt->nic_pkt) { + DPRINTF(Split, "finding a replacement for nic_req\n"); + nic_repl++; + if (lifo && lifo_net) + blk = lifo_net->findReplacement(pkt, writebacks, + compress_blocks); + else if (lru_net) + blk = lru_net->findReplacement(pkt, writebacks, + compress_blocks); + // in this case, this is an LRU only cache, it's non partitioned + else + blk = lru->findReplacement(pkt, writebacks, compress_blocks); + } else { + DPRINTF(Split, "finding replacement for cpu_req\n"); + blk = lru->findReplacement(pkt, writebacks, + compress_blocks); + cpu_repl++; + } + + Tick latency = curTick - blk->ts; + if (blk->isNIC) { + if (blk->isUsed) { + nicUsedWhenEvicted++; + usedEvictDist.sample(latency); + nicUsedTotLatency += latency; + nicUsedTotEvicted++; + } else { + nicUnusedWhenEvicted++; + unusedEvictDist.sample(latency); + nicUnusedTotLatency += latency; + nicUnusedTotEvicted++; + } + } else { + if (blk->isUsed) { + cpuUsedBlks++; + } else { + cpuUnusedBlks++; + } + } + + // blk attributes for the new blk coming IN + blk->ts = curTick; + blk->isNIC = (pkt->nic_pkt) ? true : false; + + return blk; +} + +void +Split::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = lru->findBlock(addr, asid); + if (!blk) { + if (lifo && lifo_net) + blk = lifo_net->findBlock(addr, asid); + else if (lru_net) + blk = lru_net->findBlock(addr, asid); + + if (!blk) + return; + } + + blk->status = 0; + blk->isTouched = false; + tagsInUse--; +} + +void +Split::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + if (lru->probe(asid, source)) + lru->doCopy(source, dest, asid, writebacks); + else { + if (lifo && lifo_net) + lifo_net->doCopy(source, dest, asid, writebacks); + else if (lru_net) + lru_net->doCopy(source, dest, asid, writebacks); + } +} + +void +Split::cleanupRefs() +{ + lru->cleanupRefs(); + if (lifo && lifo_net) + lifo_net->cleanupRefs(); + else if (lru_net) + lru_net->cleanupRefs(); + + ofstream memPrint(simout.resolve("memory_footprint.txt").c_str(), + ios::trunc); + + // this shouldn't be here but it happens at the end, which is what i want + memIter end = memHash.end(); + for (memIter iter = memHash.begin(); iter != end; ++iter) { + ccprintf(memPrint, "%8x\t%d\n", (*iter).first, (*iter).second); + } +} + +Addr +Split::regenerateBlkAddr(Addr tag, int set) const +{ + if (lifo_net) + return lifo_net->regenerateBlkAddr(tag, set); + else + return lru->regenerateBlkAddr(tag, set); +} + +Addr +Split::extractTag(Addr addr, SplitBlk *blk) const +{ + if (blk->part == 2) { + if (lifo_net) + return lifo_net->extractTag(addr); + else if (lru_net) + return lru_net->extractTag(addr); + else + panic("this shouldn't happen"); + } else + return lru->extractTag(addr); +} + diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh new file mode 100644 index 0000000000..6f2441597d --- /dev/null +++ b/src/mem/cache/tags/split.hh @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a split/partitioned tag store. + */ + +#ifndef __SPLIT_HH__ +#define __SPLIT_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" +#include "base/hashmap.hh" + +class BaseCache; +class SplitLRU; +class SplitLIFO; + +/** + * A cache tag store. + */ +class Split : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** Whether the 2nd partition (for the nic) is LIFO or not */ + const bool lifo; + /** The hit latency. */ + const int hitLatency; + + Addr blkMask; + + /** Number of NIC requests that hit in the NIC partition */ + Stats::Scalar<> NR_NP_hits; + /** Number of NIC requests that hit in the CPU partition */ + Stats::Scalar<> NR_CP_hits; + /** Number of CPU requests that hit in the NIC partition */ + Stats::Scalar<> CR_NP_hits; + /** Number of CPU requests that hit in the CPU partition */ + Stats::Scalar<> CR_CP_hits; + /** The number of nic replacements (i.e. misses) */ + Stats::Scalar<> nic_repl; + /** The number of cpu replacements (i.e. misses) */ + Stats::Scalar<> cpu_repl; + + //For latency studies + /** the number of NIC blks that were used before evicted */ + Stats::Scalar<> nicUsedWhenEvicted; + /** the total latency of used NIC blocks in the cache */ + Stats::Scalar<> nicUsedTotLatency; + /** the total number of used NIC blocks evicted */ + Stats::Scalar<> nicUsedTotEvicted; + /** the average number of cycles a used NIC blk is in the cache */ + Stats::Formula nicUsedAvgLatency; + /** the Distribution of used NIC blk eviction times */ + Stats::Distribution<> usedEvictDist; + + /** the number of NIC blks that were unused before evicted */ + Stats::Scalar<> nicUnusedWhenEvicted; + /** the total latency of unused NIC blks in the cache */ + Stats::Scalar<> nicUnusedTotLatency; + /** the total number of unused NIC blocks evicted */ + Stats::Scalar<> nicUnusedTotEvicted; + /** the average number of cycles an unused NIC blk is in the cache */ + Stats::Formula nicUnusedAvgLatency; + /** the Distribution of unused NIC blk eviction times */ + Stats::Distribution<> unusedEvictDist; + + /** The total latency of NIC blocks to 1st usage time by CPU */ + Stats::Scalar<> nicUseByCPUCycleTotal; + /** The total number of NIC blocks used */ + Stats::Scalar<> nicBlksUsedByCPU; + /** the average number of cycles before a NIC blk that is used gets used by CPU */ + Stats::Formula nicAvgUsageByCPULatency; + /** the Distribution of cycles time before a NIC blk is used by CPU*/ + Stats::Distribution<> useByCPUCycleDist; + + /** the number of CPU blks that were used before evicted */ + Stats::Scalar<> cpuUsedBlks; + /** the number of CPU blks that were unused before evicted */ + Stats::Scalar<> cpuUnusedBlks; + + /** the avg number of cycles before a NIC blk is evicted */ + Stats::Formula nicAvgLatency; + + typedef m5::hash_map > hash_t; + typedef hash_t::const_iterator memIter; + hash_t memHash; + + + private: + SplitLRU *lru; + SplitLRU *lru_net; + SplitLIFO *lifo_net; + + public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + Split(int _numSets, int _blkSize, int total_ways, int LRU1_assoc, + bool _lifo, bool _two_queue, int _hit_latency); + + /** + * Destructor + */ + virtual ~Split(); + + /** + * Register the stats for this object + * @param name The name to prepend to the stats name. + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of Split it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The memory request whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk The block to find the partition it's in + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const; + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + panic("should never call this!\n"); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr) (blkMask)); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, int set) const; + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * Split does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh new file mode 100644 index 0000000000..f38516180a --- /dev/null +++ b/src/mem/cache/tags/split_blk.hh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of partitioned tag store cache block class. + */ + +#ifndef __SPLIT_BLK_HH__ +#define __SPLIT_BLK_HH__ + +#include "mem/cache/cache_blk.hh" // base class + +/** + * Split cache block. + */ +class SplitBlk : public CacheBlk { + public: + /** Has this block been touched? Used to aid calculation of warmup time. */ + bool isTouched; + /** Has this block been used after being brought in? (for LIFO partition) */ + bool isUsed; + /** is this blk a NIC block? (i.e. requested by the NIC) */ + bool isNIC; + /** timestamp of the arrival of this block into the cache */ + Tick ts; + /** the previous block in the LIFO partition (brought in before than me) */ + SplitBlk *prev; + /** the next block in the LIFO partition (brought in later than me) */ + SplitBlk *next; + /** which partition this block is in */ + int part; + + SplitBlk() + : isTouched(false), isUsed(false), isNIC(false), ts(0), prev(NULL), next(NULL), + part(0) + {} +}; + +#endif + diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc new file mode 100644 index 0000000000..f2c37c80d3 --- /dev/null +++ b/src/mem/cache/tags/split_lifo.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LIFO tag store usable in a partitioned cache. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lifo.hh" +#include "sim/root.hh" +#include "base/trace.hh" + +using namespace std; + +SplitBlk* +LIFOSet::findBlk(int asid, Addr tag) const +{ + for (SplitBlk *blk = firstIn; blk != NULL; blk = blk->next) { + if (blk->tag == tag && blk->isValid()) { + return blk; + } + } + return NULL; +} + +void +LIFOSet::moveToLastIn(SplitBlk *blk) +{ + if (blk == lastIn) + return; + + if (blk == firstIn) { + blk->next->prev = NULL; + } else { + blk->prev->next = blk->next; + blk->next->prev = blk->prev; + } + blk->next = NULL; + blk->prev = lastIn; + lastIn->next = blk; + + lastIn = blk; +} + +void +LIFOSet::moveToFirstIn(SplitBlk *blk) +{ + if (blk == firstIn) + return; + + if (blk == lastIn) { + blk->prev->next = NULL; + } else { + blk->next->prev = blk->prev; + blk->prev->next = blk->next; + } + + blk->prev = NULL; + blk->next = firstIn; + firstIn->prev = blk; + + firstIn = blk; +} + +// create and initialize a LIFO cache structure +SplitLIFO::SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool two_Queue, int _part) : + blkSize(_blkSize), size(_size), numBlks(_size/_blkSize), numSets((_size/_ways)/_blkSize), ways(_ways), + hitLatency(_hit_latency), twoQueue(two_Queue), part(_part) +{ + if (!isPowerOf2(blkSize)) + fatal("cache block size (in bytes) must be a power of 2"); + if (!(hitLatency > 0)) + fatal("access latency in cycles must be at least on cycle"); + if (_ways == 0) + fatal("if instantiating a splitLIFO, needs non-zero size!"); + + + SplitBlk *blk; + int i, j, blkIndex; + + setShift = floorLog2(blkSize); + blkMask = blkSize - 1; + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = size/blkSize; + + // allocate data blocks + blks = new SplitBlk[numBlks]; + sets = new LIFOSet[numSets]; + dataBlks = new uint8_t[size]; + +/* + // these start off point to same blk + top = &(blks[0]); + head = top; +*/ + + blkIndex = 0; + for (i=0; i < numSets; ++i) { + sets[i].ways = ways; + sets[i].lastIn = &blks[blkIndex]; + sets[i].firstIn = &blks[blkIndex + ways - 1]; + + /* 3 cases: if there is 1 way, if there are 2 ways, or if there are 3+. + in the case of 1 way, last in and first out point to the same blocks, + and the next and prev pointers need to be assigned specially. and so on + */ + /* deal with the first way */ + blk = &blks[blkIndex]; + blk->prev = &blks[blkIndex + 1]; + blk->next = NULL; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + + /* if there are "middle" ways, do them here */ + if (ways > 2) { + for (j=1; j < ways-1; ++j) { + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->prev = &blks[blkIndex+1]; + blk->next = &blks[blkIndex-1]; + blk->data = &(dataBlks[blkSize*blkIndex]); + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } + } + + /* do the final way here, depending on whether the final way is the only + way or not + */ + if (ways > 1) { + blk = &blks[blkIndex]; + blk->prev = NULL; + blk->next = &blks[blkIndex - 1]; + blk->data = &dataBlks[blkSize*blkIndex]; + blk->size = blkSize; + blk->part = part; + blk->set = i; + ++blkIndex; + } else { + blk->prev = NULL; + } + } + assert(blkIndex == numBlks); +} + +SplitLIFO::~SplitLIFO() +{ + delete [] blks; + delete [] sets; + delete [] dataBlks; +} + +void +SplitLIFO::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; + + invalidations + .name(name + ".invalidations") + .desc("number of invalidations in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLIFO::probe(int asid, Addr addr) const +{ + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk* blk = sets[myset].findBlk(asid, tag); + return (blk != NULL); +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + lat = hitLatency; + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency) + lat = blk->whenReady - curTick; + blk->refCount +=1; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + if (blk) { + DPRINTF(Split, "Found LIFO blk %#x in set %d, with tag %#x\n", + addr, set, tag); + hits++; + + if (twoQueue) { + blk->isUsed = true; + sets[set].moveToFirstIn(blk); + } else { + sets[set].moveToLastIn(blk); + } + } + lat = hitLatency; + + return blk; +} + +SplitBlk* +SplitLIFO::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + + return blk; +} + +SplitBlk* +SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + + SplitBlk *firstIn = sets[set].firstIn; + SplitBlk *lastIn = sets[set].lastIn; + + SplitBlk *blk; + if (twoQueue && firstIn->isUsed) { + blk = firstIn; + blk->isUsed = false; + sets[set].moveToLastIn(blk); + } else { + int withValue = sets[set].withValue; + if (withValue == ways) { + blk = lastIn; + } else { + blk = &(sets[set].firstIn[ways - ++withValue]); + } + } + + DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", + pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLIFO::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + invalidations++; + } +} + +void +SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +SplitLIFO::cleanupRefs() +{ + for (int i = 0; i < numBlks; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh new file mode 100644 index 0000000000..c50eaa53db --- /dev/null +++ b/src/mem/cache/tags/split_lifo.hh @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LIFO tag store usable in a partitioned cache. + */ + +#ifndef __SPLIT_LIFO_HH__ +#define __SPLIT_LIFO_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include "base/hashmap.hh" +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * A LIFO set of cache blks + */ +class LIFOSet { + public: + /** the number of blocks in this set */ + int ways; + + /** Cache blocks in this set, maintained in LIFO order where + 0 = Last in (head) */ + SplitBlk *lastIn; + SplitBlk *firstIn; + + /** has the initial "filling" of this set finished? i.e., have you had + * 'ways' number of compulsory misses in this set yet? if withValue == ways, + * then yes. withValue is meant to be the number of blocks in the set that have + * gone through their first compulsory miss. + */ + int withValue; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag the Tag you are looking for + * @return Pointer to the block, if found, NULL otherwise + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + void moveToLastIn(SplitBlk *blk); + void moveToFirstIn(SplitBlk *blk); + + LIFOSet() + : ways(-1), lastIn(NULL), firstIn(NULL), withValue(0) + {} +}; + +/** + * A LIFO cache tag store. + */ +class SplitLIFO : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of bytes in a block. */ + const int blkSize; + /** the size of the cache in bytes */ + const int size; + /** the number of blocks in the cache */ + const int numBlks; + /** the number of sets in the cache */ + const int numSets; + /** the number of ways in the cache */ + const int ways; + /** The hit latency. */ + const int hitLatency; + /** whether this is a "2 queue" replacement @sa moveToLastIn @sa moveToFirstIn */ + const bool twoQueue; + /** indicator for which partition this is */ + const int part; + + /** The cache blocks. */ + SplitBlk *blks; + /** The Cache sets */ + LIFOSet *sets; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + + /** the number of hit in this partition */ + Stats::Scalar<> hits; + /** the number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + /** the number of invalidations in this partition */ + Stats::Scalar<> invalidations; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLIFO(int _blkSize, int _size, int _ways, int _hit_latency, bool twoQueue, int _part); + + /** + * Destructor + */ + virtual ~SplitLIFO(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LIFO it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LIFO does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc new file mode 100644 index 0000000000..ea5b92d6f2 --- /dev/null +++ b/src/mem/cache/tags/split_lru.cc @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Definitions of LRU tag store for a partitioned cache. + */ + +#include + +#include "mem/cache/base_cache.hh" +#include "base/intmath.hh" +#include "mem/cache/tags/split_lru.hh" +#include "sim/root.hh" + +using namespace std; + +SplitBlk* +SplitCacheSet::findBlk(int asid, Addr tag) const +{ + for (int i = 0; i < assoc; ++i) { + if (blks[i]->tag == tag && blks[i]->isValid()) { + return blks[i]; + } + } + return 0; +} + + +void +SplitCacheSet::moveToHead(SplitBlk *blk) +{ + // nothing to do if blk is already head + if (blks[0] == blk) + return; + + // write 'next' block into blks[i], moving up from MRU toward LRU + // until we overwrite the block we moved to head. + + // start by setting up to write 'blk' into blks[0] + int i = 0; + SplitBlk *next = blk; + + do { + assert(i < assoc); + // swap blks[i] and next + SplitBlk *tmp = blks[i]; + blks[i] = next; + next = tmp; + ++i; + } while (next != blk); +} + + +// create and initialize a LRU/MRU cache structure +SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part) : + numSets(_numSets), blkSize(_blkSize), assoc(_assoc), hitLatency(_hit_latency), part(_part) +{ + // Check parameters + if (blkSize < 4 || !isPowerOf2(blkSize)) { + fatal("Block size must be at least 4 and a power of 2"); + } + if (numSets <= 0 || !isPowerOf2(numSets)) { + fatal("# of sets must be non-zero and a power of 2"); + } + if (assoc <= 0) { + fatal("associativity must be greater than zero"); + } + if (hitLatency <= 0) { + fatal("access latency must be greater than zero"); + } + + SplitBlk *blk; + int i, j, blkIndex; + + blkMask = blkSize - 1; + setShift = floorLog2(blkSize); + setMask = numSets - 1; + tagShift = setShift + floorLog2(numSets); + warmedUp = false; + /** @todo Make warmup percentage a parameter. */ + warmupBound = numSets * assoc; + + sets = new SplitCacheSet[numSets]; + blks = new SplitBlk[numSets * assoc]; + // allocate data storage in one big chunk + dataBlks = new uint8_t[numSets*assoc*blkSize]; + + blkIndex = 0; // index into blks array + for (i = 0; i < numSets; ++i) { + sets[i].assoc = assoc; + + sets[i].blks = new SplitBlk*[assoc]; + + // link in the data blocks + for (j = 0; j < assoc; ++j) { + // locate next cache block + blk = &blks[blkIndex]; + blk->data = &dataBlks[blkSize*blkIndex]; + ++blkIndex; + + // invalidate new cache block + blk->status = 0; + + //EGH Fix Me : do we need to initialize blk? + + // Setting the tag to j is just to prevent long chains in the hash + // table; won't matter because the block is invalid + blk->tag = j; + blk->whenReady = 0; + blk->req->asid = -1; + blk->isTouched = false; + blk->size = blkSize; + sets[i].blks[j]=blk; + blk->set = i; + blk->part = part; + } + } +} + +SplitLRU::~SplitLRU() +{ + delete [] dataBlks; + delete [] blks; + delete [] sets; +} + +void +SplitLRU::regStats(const std::string &name) +{ + BaseTags::regStats(name); + + hits + .name(name + ".hits") + .desc("number of hits on this partition") + .precision(0) + ; + + misses + .name(name + ".misses") + .desc("number of misses in this partition") + .precision(0) + ; +} + +// probe cache for presence of given block. +bool +SplitLRU::probe(int asid, Addr addr) const +{ + // return(findBlock(Read, addr, asid) != 0); + Addr tag = extractTag(addr); + unsigned myset = extractSet(addr); + + SplitBlk *blk = sets[myset].findBlk(asid, tag); + + return (blk != NULL); // true if in cache +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid, int &lat) +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Packet * &pkt, int &lat) +{ + Addr addr = pkt->paddr; + int asid = pkt->req->asid; + + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + lat = hitLatency; + if (blk != NULL) { + // move this block to head of the MRU list + sets[set].moveToHead(blk); + if (blk->whenReady > curTick && blk->whenReady - curTick > hitLatency){ + lat = blk->whenReady - curTick; + } + blk->refCount += 1; + hits++; + } + + return blk; +} + +SplitBlk* +SplitLRU::findBlock(Addr addr, int asid) const +{ + Addr tag = extractTag(addr); + unsigned set = extractSet(addr); + SplitBlk *blk = sets[set].findBlk(asid, tag); + return blk; +} + +SplitBlk* +SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks) +{ + unsigned set = extractSet(pkt->paddr); + // grab a replacement candidate + SplitBlk *blk = sets[set].blks[assoc-1]; + sets[set].moveToHead(blk); + if (blk->isValid()) { + int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[thread_num]++; + totalRefs += blk->refCount; + ++sampledRefs; + blk->refCount = 0; + } else if (!blk->isTouched) { + tagsInUse++; + blk->isTouched = true; + if (!warmedUp && tagsInUse.value() >= warmupBound) { + warmedUp = true; + warmupCycle = curTick; + } + } + + misses++; + + return blk; +} + +void +SplitLRU::invalidateBlk(int asid, Addr addr) +{ + SplitBlk *blk = findBlock(addr, asid); + if (blk) { + blk->status = 0; + blk->isTouched = false; + tagsInUse--; + } +} + +void +SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +{ + assert(source == blkAlign(source)); + assert(dest == blkAlign(dest)); + SplitBlk *source_blk = findBlock(source, asid); + assert(source_blk); + SplitBlk *dest_blk = findBlock(dest, asid); + if (dest_blk == NULL) { + // Need to do a replacement + Packet * pkt = new Packet(); + pkt->paddr = dest; + BlkList dummy_list; + dest_blk = findReplacement(pkt, writebacks, dummy_list); + if (dest_blk->isValid() && dest_blk->isModified()) { + // Need to writeback data. + pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + dest_blk->req->asid, + dest_blk->xc, + blkSize, + (cache->doData())?dest_blk->data:0, + dest_blk->size); + writebacks.push_back(pkt); + } + dest_blk->tag = extractTag(dest); + dest_blk->req->asid = asid; + /** + * @todo Do we need to pass in the execution context, or can we + * assume its the same? + */ + assert(source_blk->xc); + dest_blk->xc = source_blk->xc; + } + /** + * @todo Can't assume the status once we have coherence on copies. + */ + + // Set this block as readable, writeable, and dirty. + dest_blk->status = 7; + if (cache->doData()) { + memcpy(dest_blk->data, source_blk->data, blkSize); + } +} + +void +SplitLRU::cleanupRefs() +{ + for (int i = 0; i < numSets*assoc; ++i) { + if (blks[i].isValid()) { + totalRefs += blks[i].refCount; + ++sampledRefs; + } + } +} diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh new file mode 100644 index 0000000000..1c0fc8600a --- /dev/null +++ b/src/mem/cache/tags/split_lru.hh @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Lisa Hsu + */ + +/** + * @file + * Declaration of a LRU tag store for a partitioned cache. + */ + +#ifndef __SPLIT_LRU_HH__ +#define __SPLIT_LRU_HH__ + +#include + +#include "mem/cache/cache_blk.hh" // base class +#include "mem/cache/tags/split_blk.hh" +#include "mem/packet.hh" // for inlined functions +#include +#include "mem/cache/tags/base_tags.hh" + +class BaseCache; + +/** + * An associative set of cache blocks. + */ + +class SplitCacheSet +{ + public: + /** The associativity of this set. */ + int assoc; + + /** Cache blocks in this set, maintained in LRU order 0 = MRU. */ + SplitBlk **blks; + + /** + * Find a block matching the tag in this set. + * @param asid The address space ID. + * @param tag The Tag to find. + * @return Pointer to the block if found. + */ + SplitBlk* findBlk(int asid, Addr tag) const; + + /** + * Move the given block to the head of the list. + * @param blk The block to move. + */ + void moveToHead(SplitBlk *blk); +}; + +/** + * A LRU cache tag store. + */ +class SplitLRU : public BaseTags +{ + public: + /** Typedef the block type used in this tag store. */ + typedef SplitBlk BlkType; + /** Typedef for a list of pointers to the local block class. */ + typedef std::list BlkList; + protected: + /** The number of sets in the cache. */ + const int numSets; + /** The number of bytes in a block. */ + const int blkSize; + /** The associativity of the cache. */ + const int assoc; + /** The hit latency. */ + const int hitLatency; + /** indicator for which partition this is */ + const int part; + + /** The cache sets. */ + SplitCacheSet *sets; + + /** The cache blocks. */ + SplitBlk *blks; + /** The data blocks, 1 per cache block. */ + uint8_t *dataBlks; + + /** The amount to shift the address to get the set. */ + int setShift; + /** The amount to shift the address to get the tag. */ + int tagShift; + /** Mask out all bits that aren't part of the set index. */ + unsigned setMask; + /** Mask out all bits that aren't part of the block offset. */ + unsigned blkMask; + + /** number of hits in this partition */ + Stats::Scalar<> hits; + /** number of blocks brought into this partition (i.e. misses) */ + Stats::Scalar<> misses; + +public: + /** + * Construct and initialize this tag store. + * @param _numSets The number of sets in the cache. + * @param _blkSize The number of bytes in a block. + * @param _assoc The associativity of the cache. + * @param _hit_latency The latency in cycles for a hit. + */ + SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int _part); + + /** + * Destructor + */ + virtual ~SplitLRU(); + + /** + * Register the statistics for this object + * @param name The name to precede the stat + */ + void regStats(const std::string &name); + + /** + * Return the block size. + * @return the block size. + */ + int getBlockSize() + { + return blkSize; + } + + /** + * Return the subblock size. In the case of LRU it is always the block + * size. + * @return The block size. + */ + int getSubBlockSize() + { + return blkSize; + } + + /** + * Search for the address in the cache. + * @param asid The address space ID. + * @param addr The address to find. + * @return True if the address is in the cache. + */ + bool probe(int asid, Addr addr) const; + + /** + * Invalidate the block containing the given address. + * @param asid The address space ID. + * @param addr The address to invalidate. + */ + void invalidateBlk(int asid, Addr addr); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param addr The address to find. + * @param asid The address space ID. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid, int &lat); + + /** + * Finds the given address in the cache and update replacement data. + * Returns the access latency as a side effect. + * @param req The req whose block to find. + * @param lat The access latency. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Packet * &pkt, int &lat); + + /** + * Finds the given address in the cache, do not update replacement data. + * @param addr The address to find. + * @param asid The address space ID. + * @return Pointer to the cache block if found. + */ + SplitBlk* findBlock(Addr addr, int asid) const; + + /** + * Find a replacement block for the address provided. + * @param req The request to a find a replacement candidate for. + * @param writebacks List for any writebacks to be performed. + * @param compress_blocks List of blocks to compress, for adaptive comp. + * @return The block to place the replacement in. + */ + SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + BlkList &compress_blocks); + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @return The tag of the address. + */ + Addr extractTag(Addr addr) const + { + return (addr >> tagShift); + } + + /** + * Generate the tag from the given address. + * @param addr The address to get the tag from. + * @param blk Ignored. + * @return The tag of the address. + */ + Addr extractTag(Addr addr, SplitBlk *blk) const + { + return (addr >> tagShift); + } + + /** + * Calculate the set index from the address. + * @param addr The address to get the set from. + * @return The set index of the address. + */ + int extractSet(Addr addr) const + { + return ((addr >> setShift) & setMask); + } + + /** + * Get the block offset from an address. + * @param addr The address to get the offset of. + * @return The block offset. + */ + int extractBlkOffset(Addr addr) const + { + return (addr & blkMask); + } + + /** + * Align an address to the block size. + * @param addr the address to align. + * @return The block address. + */ + Addr blkAlign(Addr addr) const + { + return (addr & ~(Addr)blkMask); + } + + /** + * Regenerate the block address from the tag. + * @param tag The tag of the block. + * @param set The set of the block. + * @return The block address. + */ + Addr regenerateBlkAddr(Addr tag, unsigned set) const + { + return ((tag << tagShift) | ((Addr)set << setShift)); + } + + /** + * Return the hit latency. + * @return the hit latency. + */ + int getHitLatency() const + { + return hitLatency; + } + + /** + * Read the data out of the internal storage of the given cache block. + * @param blk The cache block to read. + * @param data The buffer to read the data into. + * @return The cache block's data. + */ + void readData(SplitBlk *blk, uint8_t *data) + { + memcpy(data, blk->data, blk->size); + } + + /** + * Write data into the internal storage of the given cache block. Since in + * LRU does not store data differently this just needs to update the size. + * @param blk The cache block to write. + * @param data The data to write. + * @param size The number of bytes to write. + * @param writebacks A list for any writebacks to be performed. May be + * needed when writing to a compressed block. + */ + void writeData(SplitBlk *blk, uint8_t *data, int size, + PacketList* & writebacks) + { + assert(size <= blkSize); + blk->size = size; + } + + /** + * Perform a block aligned copy from the source address to the destination. + * @param source The block-aligned source address. + * @param dest The block-aligned destination address. + * @param asid The address space DI. + * @param writebacks List for any generated writeback requests. + */ + void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + + /** + * No impl. + */ + void fixCopy(Packet * &pkt, PacketList* &writebacks) + { + } + + /** + * Called at end of simulation to complete average block reference stats. + */ + virtual void cleanupRefs(); +}; + +#endif From fc281d0b64fca8d2809ec462148acb7cf0461ea5 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 14:35:00 -0400 Subject: [PATCH 02/17] Backing in more changsets, getting closer to compile base_cache.cc compiles, continuing on src/SConscript: Add in compilation flags for cache files src/mem/cache/base_cache.cc: src/mem/cache/base_cache.hh: Back in more fixes, now base_cache compiles src/mem/cache/cache.hh: src/mem/cache/cache_blk.hh: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/blocking_buffer.hh: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/miss_queue.hh: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: src/mem/cache/miss/mshr_queue.hh: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/iic.cc: src/mem/cache/tags/lru.cc: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lru.cc: src/mem/packet.cc: src/mem/packet.hh: src/mem/request.hh: Backing in more changsets, getting closer to compile --HG-- extra : convert_revision : ac2dcda39f8d27baffc4db1df17b9a1fcce5b6ed --- src/SConscript | 25 +++++++++ src/mem/cache/base_cache.cc | 52 ++++++++++--------- src/mem/cache/base_cache.hh | 29 +++++++---- src/mem/cache/cache.hh | 7 ++- src/mem/cache/cache_blk.hh | 4 -- src/mem/cache/cache_impl.hh | 26 +++++----- src/mem/cache/coherence/coherence_protocol.cc | 4 +- src/mem/cache/miss/blocking_buffer.cc | 22 ++++---- src/mem/cache/miss/blocking_buffer.hh | 7 ++- src/mem/cache/miss/miss_queue.cc | 36 ++++++------- src/mem/cache/miss/miss_queue.hh | 4 +- src/mem/cache/miss/mshr.cc | 11 ++-- src/mem/cache/miss/mshr.hh | 2 +- src/mem/cache/miss/mshr_queue.cc | 6 +-- src/mem/cache/miss/mshr_queue.hh | 4 +- src/mem/cache/prefetch/base_prefetcher.cc | 4 +- src/mem/cache/tags/fa_lru.cc | 4 +- src/mem/cache/tags/iic.cc | 4 +- src/mem/cache/tags/lru.cc | 4 +- src/mem/cache/tags/split_lifo.cc | 4 +- src/mem/cache/tags/split_lru.cc | 4 +- src/mem/config/cache.hh | 42 +++++++++++++++ src/mem/packet.cc | 13 +++++ src/mem/packet.hh | 22 +++++++- src/mem/request.hh | 3 ++ 25 files changed, 225 insertions(+), 118 deletions(-) create mode 100644 src/mem/config/cache.hh diff --git a/src/SConscript b/src/SConscript index 124f88708a..ff41e59316 100644 --- a/src/SConscript +++ b/src/SConscript @@ -101,6 +101,31 @@ base_sources = Split(''' mem/physical.cc mem/port.cc + mem/cache/base_cache.cc + mem/cache/cache.cc + mem/cache/cache_builder.cc + mem/cache/coherence/coherence_protocol.cc + mem/cache/coherence/uni_coherence.cc + mem/cache/miss/blocking_buffer.cc + mem/cache/miss/miss_queue.cc + mem/cache/miss/mshr.cc + mem/cache/miss/mshr_queue.cc + mem/cache/prefetch/base_prefetcher.cc + mem/cache/prefetch/ghb_prefetcher.cc + mem/cache/prefetch/prefetcher.cc + mem/cache/prefetch/stride_prefetcher.cc + mem/cache/prefetch/tagged_prefetcher.cc + mem/cache/tags/base_tags.cc + mem/cache/tags/cache_tags.cc + mem/cache/tags/fa_lru.cc + mem/cache/tags/iic/cc + mem/cache/tags/lru.cc + mem/cache/tags/repl/gen.cc + mem/cache/tags/repl/repl.cc + mem/cache/tags/split.cc + mem/cache/tags/split_lifo.cc + mem/cache/tags/split_lru.cc + sim/builder.cc sim/debug.cc sim/eventq.cc diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 10a49edb1f..89e23ce318 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -45,11 +45,11 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, { blocked = false; //Start ports at null if more than one is created we should panic - cpuSidePort = NULL; - memSidePort = NULL; + //cpuSidePort = NULL; + //memSidePort = NULL; } -bool +void BaseCache::CachePort::recvStatusChange(Port::Status status) { cache->recvStatusChange(status, isCpuSide); @@ -121,12 +121,16 @@ BaseCache::getPort(const std::string &if_name) void BaseCache::regStats() { + Request temp_req; + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + using namespace Stats; // Hit statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); hits[access_idx] .init(maxThreadsPerCPU) @@ -141,20 +145,20 @@ BaseCache::regStats() .desc("number of demand (read+write) hits") .flags(total) ; - demandHits = hits[Read] + hits[Write]; + demandHits = hits[Packet::ReadReq] + hits[Packet::WriteReq]; overallHits .name(name() + ".overall_hits") .desc("number of overall hits") .flags(total) ; - overallHits = demandHits + hits[Soft_Prefetch] + hits[Hard_Prefetch] - + hits[Writeback]; + overallHits = demandHits + hits[Packet::SoftPFReq] + hits[Packet::HardPFReq] + + hits[Packet::Writeback]; // Miss statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); misses[access_idx] .init(maxThreadsPerCPU) @@ -169,20 +173,20 @@ BaseCache::regStats() .desc("number of demand (read+write) misses") .flags(total) ; - demandMisses = misses[Read] + misses[Write]; + demandMisses = misses[Packet::ReadReq] + misses[Packet::WriteReq]; overallMisses .name(name() + ".overall_misses") .desc("number of overall misses") .flags(total) ; - overallMisses = demandMisses + misses[Soft_Prefetch] + - misses[Hard_Prefetch] + misses[Writeback]; + overallMisses = demandMisses + misses[Packet::SoftPFReq] + + misses[Packet::HardPFReq] + misses[Packet::Writeback]; // Miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); missLatency[access_idx] .init(maxThreadsPerCPU) @@ -197,20 +201,20 @@ BaseCache::regStats() .desc("number of demand (read+write) miss cycles") .flags(total) ; - demandMissLatency = missLatency[Read] + missLatency[Write]; + demandMissLatency = missLatency[Packet::ReadReq] + missLatency[Packet::WriteReq]; overallMissLatency .name(name() + ".overall_miss_latency") .desc("number of overall miss cycles") .flags(total) ; - overallMissLatency = demandMissLatency + missLatency[Soft_Prefetch] + - missLatency[Hard_Prefetch]; + overallMissLatency = demandMissLatency + missLatency[Packet::SoftPFReq] + + missLatency[Packet::HardPFReq]; // access formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); accesses[access_idx] .name(name() + "." + cstr + "_accesses") @@ -237,8 +241,8 @@ BaseCache::regStats() // miss rate formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); missRate[access_idx] .name(name() + "." + cstr + "_miss_rate") @@ -265,8 +269,8 @@ BaseCache::regStats() // miss latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMissLatency[access_idx] .name(name() + "." + cstr + "_avg_miss_latency") diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 0170b02494..977e0ae297 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -47,6 +47,7 @@ #include "mem/packet.hh" #include "mem/port.hh" #include "mem/request.hh" +#include "sim/eventq.hh" /** * Reasons for Caches to be Blocked. @@ -82,7 +83,7 @@ class BaseCache : public MemObject public: CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide); - private: + protected: virtual bool recvTiming(Packet *pkt); virtual Tick recvAtomic(Packet *pkt); @@ -96,6 +97,7 @@ class BaseCache : public MemObject virtual int deviceBlockSize(); + public: void setBlocked(); void clearBlocked(); @@ -110,10 +112,10 @@ class BaseCache : public MemObject Packet *pkt; CachePort *cachePort; - CacheResponseEvent(Packet *pkt, CachePort *cachePort); + CacheEvent(Packet *pkt, CachePort *cachePort); void process(); const char *description(); - } + }; protected: CachePort *cpuSidePort; @@ -124,7 +126,7 @@ class BaseCache : public MemObject private: //To be defined in cache_impl.hh not in base class - virtual bool doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide); + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); virtual void recvStatusChange(Port::Status status, bool isCpuSide); @@ -275,12 +277,14 @@ class BaseCache : public MemObject * of this cache. * @param params The parameter object for this BaseCache. */ - BaseCache(const std::string &name, HierParams *hier_params, Params ¶ms) - : BaseMem(name, hier_params, params.hitLatency, params.addrRange), - blocked(0), blockedSnoop(0), masterRequests(0), slaveRequests(0), - topLevelCache(false), blkSize(params.blkSize), + BaseCache(const std::string &name, Params ¶ms) + : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0), + slaveRequests(0), topLevelCache(false), blkSize(params.blkSize), missCount(params.maxMisses) { + //Start ports at null if more than one is created we should panic + cpuSidePort = NULL; + memSidePort = NULL; } /** @@ -453,8 +457,8 @@ class BaseCache : public MemObject */ void respondToMiss(Packet *pkt, Tick time) { - if (!pkt->isUncacheable()) { - missLatency[pkt->cmd.toIndex()][pkt->thread_num] += time - pkt->time; + if (!pkt->req->isUncacheable()) { + missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; } assert("Implement\n" && 0); // si->respond(pkt,time); @@ -475,6 +479,11 @@ class BaseCache : public MemObject * to do for a cache. */ void rangeChange() {} + + void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop) + { + panic("Unimplimented\n"); + } }; #endif //__BASE_CACHE_HH__ diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index dcb22a99c9..78e87479bb 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -46,7 +46,6 @@ // forward declarations class Bus; -class ExecContext; /** * A template-policy based cache. The behavior of the cache can be altered by @@ -209,11 +208,11 @@ class Cache : public BaseCache /** * Aquash all requests associated with specified thread. * intended for use by I-cache. - * @param thread_number The thread to squash. + * @param req->getThreadNum()ber The thread to squash. */ - void squash(int thread_number) + void squash(int threadNum) { - missQueue->squash(thread_number); + missQueue->squash(threadNum); } /** diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index cf1bd20e2e..02fdd7a514 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -37,7 +37,6 @@ #include "sim/root.hh" // for Tick #include "arch/isa_traits.hh" // for Addr -#include "cpu/exec_context.hh" /** * Cache block status bit assignments @@ -88,9 +87,6 @@ class CacheBlk /** Which curTick will this block be accessable */ Tick whenReady; - /** Save the exec context so that writebacks can use them. */ - ExecContext *xc; - /** * The set this block belongs to. * @todo Move this into subclasses when we fix CacheTags to use them. diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3dd8d74cde..3dc95af68f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -175,7 +175,7 @@ Cache::access(Packet &pkt) //We are determining prefetches on access stream, call prefetcher prefetcher->handleMiss(pkt, curTick); } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() && !pkt->cmd.isWrite()) { //Upgrade or Invalidate @@ -220,7 +220,7 @@ Cache::access(Packet &pkt) pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); if (blk) { // Hit - hits[pkt->cmd.toIndex()][pkt->thread_num]++; + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; // clear dirty bit if write through if (!pkt->cmd.isNoResponse()) respond(pkt, curTick+lat); @@ -228,8 +228,8 @@ Cache::access(Packet &pkt) } // Miss - if (!pkt->isUncacheable()) { - misses[pkt->cmd.toIndex()][pkt->thread_num]++; + if (!pkt->req->isUncacheable()) { + misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; /** @todo Move miss count code into BaseCache */ if (missCount) { --missCount; @@ -248,8 +248,8 @@ Cache::getPacket() { Packet * pkt = missQueue->getPacket(); if (pkt) { - if (!pkt->isUncacheable()) { - if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->thread_num]++; + if (!pkt->req->isUncacheable()) { + if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->req->getThreadNum()]++; BlkType *blk = tags->findBlock(pkt); Packet::Command cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); @@ -272,7 +272,7 @@ Cache::sendResult(MemPktPtr &pkt, bool success) if (pkt->cmd == Upgrade) { handleResponse(pkt); } - } else if (pkt && !pkt->isUncacheable()) { + } else if (pkt && !pkt->req->isUncacheable()) { missQueue->restoreOrigCmd(pkt); } } @@ -394,7 +394,7 @@ Cache::snoop(Packet * &pkt) for (int i=0; ipkt->isUncacheable()) { + if (!mshr->pkt->req->isUncacheable()) { if (pkt->cmd.isRead()) { //Only Upgrades don't get here //Supply the data @@ -469,7 +469,7 @@ Cache::probe(Packet * &pkt, bool update) { MemDebug::cacheProbe(pkt); - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() && !pkt->cmd.isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward @@ -583,7 +583,7 @@ Cache::probe(Packet * &pkt, bool update) // Can't handle it, return pktuest unsatisfied. return 0; } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill Packet * busPkt = new MemPkt(); busPkt->paddr = blk_addr; @@ -596,7 +596,7 @@ Cache::probe(Packet * &pkt, bool update) busPkt->req->asid = pkt->req->asid; busPkt->xc = pkt->xc; - busPkt->thread_num = pkt->thread_num; + busPkt->req->setThreadNum() = pkt->req->getThreadNum(); busPkt->time = curTick; lat = mi->sendProbe(busPkt, update); @@ -606,7 +606,7 @@ Cache::probe(Packet * &pkt, bool update) return 0; } - misses[pkt->cmd.toIndex()][pkt->thread_num]++; + misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; CacheBlk::State old_state = (blk) ? blk->status : 0; tags->handleFill(blk, busPkt, @@ -631,7 +631,7 @@ Cache::probe(Packet * &pkt, bool update) } if (update) { - hits[pkt->cmd.toIndex()][pkt->thread_num]++; + hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } else if (pkt->cmd.isWrite()) { // Still need to change data in all locations. return mi->sendProbe(pkt, update); diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 107fd2502b..9d5b8ef546 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -465,7 +465,7 @@ CacheBlk::State CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) { CacheBlk::State state = oldState & stateMask; - int cmd_idx = pkt->cmd.toIndex(); + int cmd_idx = pkt->cmdToIndex(); assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); @@ -498,7 +498,7 @@ CoherenceProtocol::handleBusRequest(BaseCache *cache, Packet * &pkt, } CacheBlk::State state = blk->status & stateMask; - int cmd_idx = pkt->cmd.toIndex(); + int cmd_idx = pkt->cmdToIndex(); assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 621855c3d1..912a0f5bd0 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -73,7 +73,7 @@ void BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) { Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); - if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || pkt->cmd.isNoResponse())) { if (pkt->cmd.isNoResponse()) { wb.allocateAsBuffer(pkt); @@ -93,7 +93,7 @@ BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) } else { miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); } - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { miss.pkt->flags |= CACHE_LINE_FILL; } cache->setBlocked(Blocked_NoMSHRs); @@ -186,12 +186,12 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) } void -BlockingBuffer::squash(int thread_number) +BlockingBuffer::squash(int req->getThreadNum()ber) { - if (miss.threadNum == thread_number) { + if (miss.setThreadNum() == req->getThreadNum()ber) { Packet * target = miss.getTarget(); miss.popTarget(); - assert(target->thread_num == thread_number); + assert(target->req->setThreadNum() == req->getThreadNum()ber); if (target->completionEvent != NULL) { delete target->completionEvent; } @@ -207,7 +207,7 @@ BlockingBuffer::squash(int thread_number) } void -BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, +BlockingBuffer::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { @@ -224,18 +224,14 @@ BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, * @todo Need to find a way to charge the writeback to the "correct" * thread. */ - pkt->xc = xc; - if (xc) - pkt->thread_num = xc->getThreadNum(); - else - pkt->thread_num = 0; + pkt->req->setThreadNum() = 0; pkt->cmd = Writeback; if (compressed) { pkt->flags |= COMPRESSED; } - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); cache->setMasterRequest(Request_WB, curTick); @@ -247,7 +243,7 @@ BlockingBuffer::doWriteback(Addr addr, int asid, ExecContext *xc, void BlockingBuffer::doWriteback(Packet * &pkt) { - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 52256be742..08814b43e6 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -164,9 +164,9 @@ public: /** * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. - * @param thread_number The thread number of the requests to squash. + * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Return the current number of outstanding misses. @@ -212,12 +212,11 @@ public: * Perform a writeback of dirty data to the given address. * @param addr The address to write to. * @param asid The address space id. - * @param xc The execution context of the address space. * @param size The number of bytes to write. * @param data The data to write, can be NULL. * @param compressed True if the data is compressed. */ - void doWriteback(Addr addr, int asid, ExecContext *xc, + void doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed); /** diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 7902fbcee9..d02f27d52a 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -347,7 +347,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) { MSHR* mshr = mq.allocate(pkt, size); mshr->order = order++; - if (!pkt->isUncacheable() ){//&& !pkt->isNoAllocate()) { + if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) { // Mark this as a cache line fill mshr->pkt->flags |= CACHE_LINE_FILL; } @@ -399,13 +399,13 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) int size = blkSize; Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); MSHR* mshr = NULL; - if (!pkt->isUncacheable()) { + if (!pkt->req->isUncacheable()) { mshr = mq.findMatch(blkAddr, pkt->req->asid); if (mshr) { //@todo remove hw_pf here - mshr_hits[pkt->cmd.toIndex()][pkt->thread_num]++; - if (mshr->threadNum != pkt->thread_num) { - mshr->threadNum = -1; + mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; + if (mshr->getThreadNum() != pkt->req->getThreadNum()) { + mshr->setThreadNum() = -1; } mq.allocateTarget(mshr, pkt); if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { @@ -424,14 +424,14 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) mshr_no_allocate_misses++; } else { - mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } } else { //Count uncacheable accesses - mshr_uncacheable[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; size = pkt->size; } - if (pkt->cmd.isWrite() && (pkt->isUncacheable() || !writeAllocate || + if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || pkt->cmd.isNoResponse())) { /** * @todo Add write merging here. @@ -489,7 +489,7 @@ MissQueue::getPacket() pkt = prefetcher->getPacket(); if (pkt) { //Update statistic on number of prefetches issued (hwpf_mshr_misses) - mshr_misses[pkt->cmd.toIndex()][pkt->thread_num]++; + mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; //It will request the bus for the future, but should clear that immedieatley allocateMiss(pkt, pkt->size, curTick); pkt = mq.getReq(); @@ -582,7 +582,7 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) BlockedCause cause = NUM_BLOCKED_CAUSES; if (pkt->isCacheFill() && !pkt->isNoAllocate()) { - mshr_miss_latency[mshr->originalCmd][pkt->thread_num] += + mshr_miss_latency[mshr->originalCmd][pkt->req->getThreadNum()] += curTick - pkt->time; // targets were handled in the cache tags if (mshr == noTargetMSHR) { @@ -608,11 +608,11 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } } } else { - if (pkt->isUncacheable()) { - mshr_uncacheable_lat[pkt->cmd][pkt->thread_num] += + if (pkt->req->isUncacheable()) { + mshr_uncacheable_lat[pkt->cmd][pkt->req->getThreadNum()] += curTick - pkt->time; } - if (mshr->hasTargets() && pkt->isUncacheable()) { + if (mshr->hasTargets() && pkt->req->isUncacheable()) { // Should only have 1 target if we had any assert(num_targets == 1); Packet * target = mshr->getTarget(); @@ -660,12 +660,12 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } void -MissQueue::squash(int thread_number) +MissQueue::squash(int req->getThreadNum()ber) { bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; - if (noTargetMSHR && noTargetMSHR->threadNum == thread_number) { + if (noTargetMSHR && noTargetMSHR->setThreadNum() == req->getThreadNum()ber) { noTargetMSHR = NULL; unblock = true; cause = Blocked_NoTargets; @@ -674,7 +674,7 @@ MissQueue::squash(int thread_number) unblock = true; cause = Blocked_NoMSHRs; } - mq.squash(thread_number); + mq.squash(req->getThreadNum()ber); if (!mq.havePending()) { cache->clearMasterRequest(Request_MSHR); } @@ -704,7 +704,7 @@ MissQueue::doWriteback(Addr addr, int asid, Packet * pkt = buildWritebackReq(addr, asid, size, data, compressed); - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); } @@ -713,7 +713,7 @@ MissQueue::doWriteback(Addr addr, int asid, void MissQueue::doWriteback(Packet * &pkt) { - writebacks[pkt->thread_num]++; + writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); } diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index ce827fe812..d459821086 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -268,9 +268,9 @@ class MissQueue /** * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. - * @param thread_number The thread number of the requests to squash. + * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 73aeaf6cae..5c3c9fd1d0 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -50,7 +50,7 @@ MSHR::MSHR() { inService = false; ntargets = 0; - threadNum = -1; + setThreadNum() = -1; } void @@ -68,7 +68,7 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, pkt->data = new uint8_t[size]; pkt->senderState = this; //Set the time here for latency calculations - //pkt->time = curTick; + pkt->time = curTick; if (target) { pkt->req = target->req; @@ -85,7 +85,7 @@ MSHR::allocateAsBuffer(Packet * &target) { addr = target->paddr; asid = target->req->asid; - threadNum = target->thread_num; + setThreadNum() = target->req->getThreadNum(); pkt = new Packet(); pkt->addr = target->addr; pkt->dest = target->dest; @@ -94,6 +94,7 @@ MSHR::allocateAsBuffer(Packet * &target) pkt->req = target->req; pkt->data = new uint8_t[target->size]; pkt->senderState = this; + pkt->time = curTick; } void @@ -161,14 +162,14 @@ MSHR::dump() "inService: %d thread: %d\n" "Addr: %x asid: %d ntargets %d\n" "Targets:\n", - inService, threadNum, addr, asid, ntargets); + inService, getThreadNum(), addr, asid, ntargets); TargetListIterator tar_it = targets.begin(); for (int i = 0; i < ntargets; i++) { assert(tar_it != targets.end()); ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", - i, (*tar_it)->paddr, (*tar_it)->cmd.toIndex()); + i, (*tar_it)->paddr, (*tar_it)->cmdToIndex()); tar_it++; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 167aa26cd1..3bd6d36d15 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -66,7 +66,7 @@ class MSHR { /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ - int threadNum; + int getThreadNum(); /** The request that is forwarded to the next level of the hierarchy. */ Packet * pkt; /** The number of currently allocated targets. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index 72c8cc4981..ced43d30af 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -237,18 +237,18 @@ MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) } void -MSHRQueue::squash(int thread_number) +MSHRQueue::squash(int req->getThreadNum()ber) { MSHR::Iterator i = allocatedList.begin(); MSHR::Iterator end = allocatedList.end(); for (; i != end;) { MSHR *mshr = *i; - if (mshr->threadNum == thread_number) { + if (mshr->setThreadNum() == req->getThreadNum()ber) { while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - assert(target->thread_num == thread_number); + assert(target->req->setThreadNum() == req->getThreadNum()ber); if (target->completionEvent != NULL) { delete target->completionEvent; } diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 3e1d3f39f0..563368d292 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -190,9 +190,9 @@ class MSHRQueue { /** * Squash outstanding requests with the given thread number. If a request * is in service, just squashes the targets. - * @param thread_number The thread to squash. + * @param req->getThreadNum()ber The thread to squash. */ - void squash(int thread_number); + void squash(int req->getThreadNum()ber); /** * Returns true if the pending list is not empty. diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 14beef2601..7b2d57cd53 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -132,7 +132,7 @@ BasePrefetcher::getPacket() void BasePrefetcher::handleMiss(Packet * &pkt, Tick time) { - if (!pkt->isUncacheable() && !(pkt->isInstRead() && only_data)) + if (!pkt->req->isUncacheable() && !(pkt->isInstRead() && only_data)) { //Calculate the blk address Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); @@ -185,7 +185,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) prefetch->xc = pkt->xc; prefetch->data = new uint8_t[blkSize]; prefetch->req->asid = pkt->req->asid; - prefetch->thread_num = pkt->thread_num; + prefetch->req->setThreadNum() = pkt->req->getThreadNum(); prefetch->time = time + (*delay); //@todo ADD LATENCY HERE //... initialize diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 66d91b35b6..43ab363095 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -264,8 +264,8 @@ FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, tagHash.erase(blk->tag); tagHash[blkAlign(pkt->paddr)] = blk; if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; } else { tagsInUse++; blk->isTouched = true; diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index a574adaa32..f4641401fc 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -418,8 +418,8 @@ IIC::freeReplacementBlock(PacketList* & writebacks) tag_ptr->isModified() ? "writeback" : "clean"); /* write back replaced block data */ if (tag_ptr && (tag_ptr->isValid())) { - int thread_num = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += tag_ptr->refCount; ++sampledRefs; tag_ptr->refCount = 0; diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 0fe88fd087..19a52aade7 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -225,8 +225,8 @@ LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, LRUBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc index f2c37c80d3..c6bb91eff1 100644 --- a/src/mem/cache/tags/split_lifo.cc +++ b/src/mem/cache/tags/split_lifo.cc @@ -317,8 +317,8 @@ SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc index ea5b92d6f2..4b7f4c1147 100644 --- a/src/mem/cache/tags/split_lru.cc +++ b/src/mem/cache/tags/split_lru.cc @@ -244,8 +244,8 @@ SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, SplitBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int thread_num = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[thread_num]++; + int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; + replacements[req->getThreadNum()]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; diff --git a/src/mem/config/cache.hh b/src/mem/config/cache.hh new file mode 100644 index 0000000000..24da040219 --- /dev/null +++ b/src/mem/config/cache.hh @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Nathan Binkert + */ + +/** + * @file + * Central location to configure which cache types we want to build + * into the simulator. In the future, this should probably be + * autogenerated by some sort of configuration script. + */ +#define USE_CACHE_LRU 1 +#define USE_CACHE_FALRU 1 +// #define USE_CACHE_SPLIT 1 +// #define USE_CACHE_SPLIT_LIFO 1 +#define USE_CACHE_IIC 1 + diff --git a/src/mem/packet.cc b/src/mem/packet.cc index 56dd2bdfad..91298df8ce 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -57,6 +57,19 @@ Packet::cmdString() const } } +const std::string & +Packet::cmdIdxToString(Packet::Command idx) +{ + switch (idx) { + case ReadReq: return ReadReqString; + case WriteReq: return WriteReqString; + case WriteReqNoAck: return WriteReqNoAckString; + case ReadResp: return ReadRespString; + case WriteResp: return WriteRespString; + default: return OtherCmdString; + } +} + /** delete the data pointed to in the data pointer. Ok to call to matter how * data was allocted. */ void diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 403039d966..176c6f793b 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -46,6 +46,10 @@ struct Packet; typedef Packet* PacketPtr; typedef uint8_t* PacketDataPtr; +//For statistics we need max number of commands, hard code it at +//20 for now. @todo fix later +#define NUM_MEM_CMDS 1 << 9 + /** * A Packet is used to encapsulate a transfer between two objects in * the memory system (e.g., the L1 and L2 cache). (In contrast, a @@ -102,6 +106,9 @@ class Packet public: + /** Used to calculate latencies for each packet.*/ + Tick time; + /** The special destination address indicating that the packet * should be routed based on its address. */ static const short Broadcast = -1; @@ -149,6 +156,8 @@ class Packet IsRequest = 1 << 4, IsResponse = 1 << 5, NeedsResponse = 1 << 6, + IsSWPrefetch = 1 << 7, + IsHWPrefetch = 1 << 8 }; public: @@ -159,13 +168,24 @@ class Packet WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, ReadResp = IsRead | IsResponse, - WriteResp = IsWrite | IsResponse + WriteResp = IsWrite | IsResponse, + Writeback = IsWrite | IsRequest, + SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, + HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, + SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, + HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse }; /** Return the string name of the cmd field (for debugging and * tracing). */ const std::string &cmdString() const; + /** Reutrn the string to a cmd given by idx. */ + const std::string &cmdIdxToString(Command idx); + + /** Return the index of this command. */ + inline int cmdToIndex() const { return (int) cmd; } + /** The command field of the packet. */ Command cmd; diff --git a/src/mem/request.hh b/src/mem/request.hh index af1d6d8a85..469184b130 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -224,6 +224,9 @@ class Request /** Accessor function for pc.*/ Addr getPC() { assert(validPC); return pc; } + /** Accessor Function to Check Cacheability. */ + bool isUncacheable() { return getFlags() & UNCACHEABLE; } + friend class Packet; }; From 0d323c753d897bec72884089bc0dc334a64e9eb3 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 28 Jun 2006 17:28:33 -0400 Subject: [PATCH 03/17] More Changes, working towards cache.cc compiling. Headers cleaned up. src/mem/cache/cache_blk.hh: Remove XC --HG-- extra : convert_revision : aa2c43e4412ebb93165e12f693d5126983cfd0dc --- src/mem/cache/cache.hh | 7 +- src/mem/cache/cache_blk.hh | 3 +- src/mem/cache/cache_impl.hh | 183 +++++++++--------- src/mem/cache/coherence/coherence_protocol.hh | 4 +- src/mem/cache/coherence/simple_coherence.hh | 6 +- src/mem/cache/coherence/uni_coherence.hh | 11 +- src/mem/cache/miss/blocking_buffer.hh | 2 +- src/mem/cache/miss/miss_queue.hh | 2 +- src/mem/cache/miss/mshr_queue.hh | 2 +- src/mem/packet.hh | 17 +- src/mem/request.hh | 1 + 11 files changed, 124 insertions(+), 114 deletions(-) diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 78e87479bb..587faaf511 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -138,7 +138,7 @@ class Cache : public BaseCache }; /** Instantiates a basic cache object. */ - Cache(const std::string &_name, HierParams *hier_params, Params ¶ms); + Cache(const std::string &_name, Params ¶ms); void regStats(); @@ -147,7 +147,7 @@ class Cache : public BaseCache * @param req The request to perform. * @return The result of the access. */ - MemAccessResult access(Packet * &pkt); + bool access(Packet * &pkt); /** * Selects a request to send on the bus. @@ -233,7 +233,8 @@ class Cache : public BaseCache */ void respond(Packet * &pkt, Tick time) { - si->respond(pkt,time); + //si->respond(pkt,time); + cpuSidePort->sendAtomic(pkt); } /** diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh index 02fdd7a514..67e65d25bb 100644 --- a/src/mem/cache/cache_blk.hh +++ b/src/mem/cache/cache_blk.hh @@ -97,7 +97,7 @@ class CacheBlk int refCount; CacheBlk() - : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), xc(0), + : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0), set(-1), refCount(0) {} @@ -114,7 +114,6 @@ class CacheBlk size = rhs.size; status = rhs.status; whenReady = rhs.whenReady; - xc = rhs.xc; set = rhs.set; refCount = rhs.refCount; return *this; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3dc95af68f..699d874deb 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -51,14 +51,6 @@ #include "mem/cache/miss/mshr.hh" #include "mem/cache/prefetch/prefetcher.hh" -#include "mem/bus/bus.hh" - -#include "mem/bus/slave_interface.hh" -#include "mem/memory_interface.hh" -#include "mem/bus/master_interface.hh" - -#include "mem/mem_debug.hh" - #include "sim/sim_events.hh" // for SimExitEvent using namespace std; @@ -66,7 +58,7 @@ using namespace std; template bool Cache:: -doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -74,17 +66,18 @@ doTimingAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoop(pkt); } + return true; //Deal with blocking.... } template Tick Cache:: -doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -92,7 +85,7 @@ doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoopProbe(pkt, true); @@ -102,7 +95,7 @@ doAtomicAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) template void Cache:: -doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) +doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) { if (isCpuSide) { @@ -110,7 +103,7 @@ doFunctionalAccess(Packet *pkt, MemoryPort *memoryPort, bool isCpuSide) } else { - if (pkt->isRespnse()) + if (pkt->isResponse()) handleResponse(pkt); else snoopProbe(pkt, false); @@ -128,9 +121,9 @@ recvStatusChange(Port::Status status, bool isCpuSide) template Cache:: -Cache(const std::string &_name, HierParams *hier_params, +Cache(const std::string &_name, Cache::Params ¶ms) - : BaseCache(_name, hier_params, params.baseParams), + : BaseCache(_name, params.baseParams), prefetchAccess(params.prefetchAccess), tags(params.tags), missQueue(params.missQueue), coherence(params.coherence), prefetcher(params.prefetcher), @@ -148,7 +141,7 @@ Cache(const std::string &_name, HierParams *hier_params, prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); invalidatePkt = new Packet; - invalidatePkt->cmd = Invalidate; + invalidatePkt->cmd = Packet::InvalidateReq; } template @@ -163,12 +156,13 @@ Cache::regStats() } template -MemAccessResult -Cache::access(Packet &pkt) +bool +Cache::access(PacketPtr &pkt) { - MemDebug::cacheAccess(pkt); +//@todo Add back in MemDebug Calls +// MemDebug::cacheAccess(pkt); BlkType *blk = NULL; - PacketList* writebacks; + PacketList writebacks; int size = blkSize; int lat = hitLatency; if (prefetchAccess) { @@ -176,18 +170,19 @@ Cache::access(Packet &pkt) prefetcher->handleMiss(pkt, curTick); } if (!pkt->req->isUncacheable()) { - if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() - && !pkt->cmd.isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { //Upgrade or Invalidate //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), - pkt->paddr & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), + pkt->addr & ~((Addr)blkSize - 1)); //@todo Should this return latency have the hit latency in it? // respond(pkt,curTick+lat); - pkt->flags |= SATISFIED; - return MA_HIT; + (int)pkt->coherence |= SATISFIED; +// return MA_HIT; //@todo, return values + return true; } blk = tags->handleAccess(pkt, lat, writebacks); } else { @@ -198,10 +193,10 @@ Cache::access(Packet &pkt) /** @todo make the fast write alloc (wh64) work with coherence. */ /** @todo Do we want to do fast writes for writebacks as well? */ if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && - (pkt->cmd == Write || pkt->cmd == WriteInvalidate) ) { + (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) { // not outstanding misses, can do this - MSHR* outstanding_miss = missQueue->findMSHR(pkt->paddr, pkt->req->asid); - if (pkt->cmd ==WriteInvalidate || !outstanding_miss) { + MSHR* outstanding_miss = missQueue->findMSHR(pkt->addr, pkt->req->asid); + if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) { if (outstanding_miss) { warn("WriteInv doing a fastallocate" "with an outstanding miss to the same address\n"); @@ -215,16 +210,17 @@ Cache::access(Packet &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->paddr & ~((Addr)blkSize - 1), pkt->pc); + DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->addr & ~((Addr)blkSize - 1), pkt->req->pc); if (blk) { // Hit hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; // clear dirty bit if write through - if (!pkt->cmd.isNoResponse()) + if (pkt->needsResponse()) respond(pkt, curTick+lat); - return MA_HIT; +// return MA_HIT; + return true; } // Miss @@ -234,11 +230,12 @@ Cache::access(Packet &pkt) if (missCount) { --missCount; if (missCount == 0) - new SimExitEvent("A cache reached the maximum miss count"); + new SimLoopExitEvent("A cache reached the maximum miss count"); } } missQueue->handleMiss(pkt, size, curTick + hitLatency); - return MA_CACHE_MISS; +// return MA_CACHE_MISS; + return true; } @@ -249,7 +246,7 @@ Cache::getPacket() Packet * pkt = missQueue->getPacket(); if (pkt) { if (!pkt->req->isUncacheable()) { - if (pkt->cmd == Hard_Prefetch) misses[Hard_Prefetch][pkt->req->getThreadNum()]++; + if (pkt->cmd == Packet::HardPFReq) misses[Packet::HardPFReq][pkt->req->getThreadNum()]++; BlkType *blk = tags->findBlock(pkt); Packet::Command cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); @@ -257,19 +254,19 @@ Cache::getPacket() } } - assert(!doMasterPktuest() || missQueue->havePending()); + assert(!doMasterRequest() || missQueue->havePending()); assert(!pkt || pkt->time <= curTick); return pkt; } template void -Cache::sendResult(MemPktPtr &pkt, bool success) +Cache::sendResult(PacketPtr &pkt, bool success) { if (success) { missQueue->markInService(pkt); //Temp Hack for UPGRADES - if (pkt->cmd == Upgrade) { + if (pkt->cmd == Packet::UpgradeReq) { handleResponse(pkt); } } else if (pkt && !pkt->req->isUncacheable()) { @@ -283,14 +280,14 @@ Cache::handleResponse(Packet * &pkt) { BlkType *blk = NULL; if (pkt->senderState) { - MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->paddr, - pkt->paddr & (((ULL(1))<<48)-1)); +// MemDebug::cacheResponse(pkt); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->addr, + pkt->addr & (((ULL(1))<<48)-1)); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; - MemPktList writebacks; + PacketList writebacks; blk = tags->handleFill(blk, pkt->senderState, coherence->getNewState(pkt,old_state), writebacks); @@ -309,11 +306,11 @@ Cache::pseudoFill(Addr addr, int asid) // Need to temporarily move this blk into MSHRs MSHR *mshr = missQueue->allocateTargetList(addr, asid); int lat; - PacketList* dummy; + PacketList dummy; // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert(mshr->pkt->isSatisfied()); + assert((int)mshr->pkt->coherence & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. @@ -325,17 +322,17 @@ void Cache::pseudoFill(MSHR *mshr) { // Need to temporarily move this blk into MSHRs - assert(mshr->pkt->cmd == Read); + assert(mshr->pkt->cmd == Packet::ReadReq); int lat; - PacketList* dummy; + PacketList dummy; // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert(mshr->pkt->isSatisfied()); + assert((int)mshr->pkt->coherence & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. - tags->invalidateBlk(mshr->pkt->paddr, mshr->pkt->req->asid); + tags->invalidateBlk(mshr->pkt->addr, mshr->pkt->req->asid); } @@ -351,19 +348,19 @@ template void Cache::snoop(Packet * &pkt) { - Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req //If we find an mshr, and it is in service, we need to NACK or invalidate if (mshr) { if (mshr->inService) { - if ((mshr->pkt->cmd.isInvalidate() || !mshr->pkt->isCacheFill()) - && (pkt->cmd != Invalidate && pkt->cmd != WriteInvalidate)) { + if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill()) + && (pkt->cmd != Packet::InvalidateReq && pkt->cmd != Packet::WriteInvalidateReq)) { //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) - pkt->flags |= NACKED_LINE; + (int)pkt->coherence |= NACKED_LINE; return; } else { @@ -376,11 +373,11 @@ Cache::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - invalidatePkt->paddr = pkt->paddr; + invalidatePkt->addr = pkt->addr; //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); return; } } @@ -388,34 +385,32 @@ Cache::snoop(Packet * &pkt) //We also need to check the writeback buffers and handle those std::vector writebacks; if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->paddr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); //Look through writebacks for any non-uncachable writes, use that for (int i=0; ipkt->req->isUncacheable()) { - if (pkt->cmd.isRead()) { + if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data - pkt->flags |= SATISFIED; + (int)pkt->coherence |= SATISFIED; //If we are in an exclusive protocol, make it ask again //to get write permissions (upgrade), signal shared - pkt->flags |= SHARED_LINE; + (int)pkt->coherence |= SHARED_LINE; - if (doData()) { - assert(pkt->cmd.isRead()); + assert(pkt->isRead()); + assert(pkt->offset < blkSize); + assert(pkt->size <= blkSize); + assert(pkt->offset + pkt->size <=blkSize); + memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); - assert(pkt->offset < blkSize); - assert(pkt->size <= blkSize); - assert(pkt->offset + pkt->size <=blkSize); - memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); - } respondToSnoop(pkt); } - if (pkt->cmd.isInvalidate()) { + if (pkt->isInvalidate()) { //This must be an upgrade or other cache will take ownership missQueue->markInService(mshr->pkt); } @@ -439,7 +434,7 @@ void Cache::snoopResponse(Packet * &pkt) { //Need to handle the response, if NACKED - if (pkt->isNacked()) { + if ((int)pkt->coherence & NACKED_LINE) { //Need to mark it as not in service, and retry for bus assert(0); //Yeah, we saw a NACK come through @@ -467,16 +462,16 @@ template Tick Cache::probe(Packet * &pkt, bool update) { - MemDebug::cacheProbe(pkt); +// MemDebug::cacheProbe(pkt); if (!pkt->req->isUncacheable()) { - if (pkt->cmd.isInvalidate() && !pkt->cmd.isRead() - && !pkt->cmd.isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() + && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward - DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmd.toString(), - pkt->req->asid, pkt->paddr & (((ULL(1))<<48)-1), - pkt->paddr & ~((Addr)blkSize - 1)); - pkt->flags |= SATISFIED; + DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), + pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), + pkt->addr & ~((Addr)blkSize - 1)); + (int)pkt->coherence |= SATISFIED; return 0; } } @@ -486,13 +481,13 @@ Cache::probe(Packet * &pkt, bool update) return mi->sendProbe(pkt,update); } - PacketList* writebacks; + PacketList writebacks; int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); if (!blk) { // Need to check for outstanding misses and writes - Addr blk_addr = pkt->paddr & ~(blkSize - 1); + Addr blk_addr = pkt->addr & ~(blkSize - 1); // There can only be one matching outstanding miss. MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); @@ -513,12 +508,12 @@ Cache::probe(Packet * &pkt, bool update) Packet * target = *i; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->cmd.isWrite() && target->overlaps(pkt)) { + if (target->isWrite() && target->overlaps(pkt)) { uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (target->paddr < pkt->paddr) { - int offset = pkt->paddr - target->paddr; + if (target->addr < pkt->addr) { + int offset = pkt->addr - target->paddr; pkt_data = pkt->data; write_data = target->data + offset; data_size = target->size - offset; @@ -526,7 +521,7 @@ Cache::probe(Packet * &pkt, bool update) if (data_size > pkt->size) data_size = pkt->size; } else { - int offset = target->paddr - pkt->paddr; + int offset = target->addr - pkt->addr; pkt_data = pkt->data + offset; write_data = target->data; data_size = pkt->size - offset; @@ -535,7 +530,7 @@ Cache::probe(Packet * &pkt, bool update) data_size = target->size; } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { memcpy(pkt_data, write_data, data_size); } else { memcpy(write_data, pkt_data, data_size); @@ -550,8 +545,8 @@ Cache::probe(Packet * &pkt, bool update) uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (write->paddr < pkt->paddr) { - int offset = pkt->paddr - write->paddr; + if (write->addr < pkt->addr) { + int offset = pkt->addr - write->addr; pkt_data = pkt->data; write_data = write->data + offset; data_size = write->size - offset; @@ -559,7 +554,7 @@ Cache::probe(Packet * &pkt, bool update) if (data_size > pkt->size) data_size = pkt->size; } else { - int offset = write->paddr - pkt->paddr; + int offset = write->addr - pkt->addr; pkt_data = pkt->data + offset; write_data = write->data; data_size = pkt->size - offset; @@ -568,7 +563,7 @@ Cache::probe(Packet * &pkt, bool update) data_size = write->size; } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { memcpy(pkt_data, write_data, data_size); } else { memcpy(write_data, pkt_data, data_size); @@ -585,8 +580,8 @@ Cache::probe(Packet * &pkt, bool update) } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill - Packet * busPkt = new MemPkt(); - busPkt->paddr = blk_addr; + Packet * busPkt = new Packet(); + busPkt->addr = blk_addr; busPkt->size = blkSize; busPkt->data = new uint8_t[blkSize]; @@ -632,7 +627,7 @@ Cache::probe(Packet * &pkt, bool update) if (update) { hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - } else if (pkt->cmd.isWrite()) { + } else if (pkt->isWrite()) { // Still need to change data in all locations. return mi->sendProbe(pkt, update); } @@ -644,9 +639,9 @@ Cache::probe(Packet * &pkt, bool update) template Tick -Cache::snoopProbe(MemPktPtr &pkt, bool update) +Cache::snoopProbe(PacketPtr &pkt, bool update) { - Addr blk_addr = pkt->paddr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); CacheBlk::State new_state = 0; diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh index 4f65205525..21351ace46 100644 --- a/src/mem/cache/coherence/coherence_protocol.hh +++ b/src/mem/cache/coherence/coherence_protocol.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Ron Dreslinski * Steve Reinhardt */ @@ -40,7 +41,6 @@ #include "sim/sim_object.hh" #include "mem/packet.hh" -#include "mem/mem_cmd.hh" #include "mem/cache/cache_blk.hh" #include "base/statistics.hh" @@ -89,7 +89,7 @@ class CoherenceProtocol : public SimObject * @param oldState The current block state. * @return The new state. */ - CacheBlk::State getNewState(const Packet * &pkt, + CacheBlk::State getNewState(Packet * &pkt, CacheBlk::State oldState); /** diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh index 1956745902..ca9d18bebd 100644 --- a/src/mem/cache/coherence/simple_coherence.hh +++ b/src/mem/cache/coherence/simple_coherence.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Ron Dreslinski */ /** @@ -39,7 +40,6 @@ #include #include "mem/packet.hh" -#include "mem/mem_cmd.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" #include "mem/cache/coherence/coherence_protocol.hh" @@ -119,7 +119,7 @@ class SimpleCoherence //Got rid of, there could be an MSHR, but it can't be in service if (blk != NULL) { - if (pkt->cmd != Writeback) { + if (pkt->cmd != Packet::Writeback) { return protocol->handleBusRequest(cache, pkt, blk, mshr, new_state); } @@ -138,7 +138,7 @@ class SimpleCoherence */ Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) { - if (cmd == Writeback) return Writeback; + if (cmd == Packet::Writeback) return Packet::Writeback; return protocol->getBusCmd(cmd, state); } diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh index b64f6c931a..4e895997f6 100644 --- a/src/mem/cache/coherence/uni_coherence.hh +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -34,7 +34,6 @@ #include "base/trace.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" -#include "mem/mem_cmd.hh" #include "mem/packet.hh" class BaseCache; @@ -79,11 +78,11 @@ class UniCoherence */ Packet::Command getBusCmd(Packet::Command &cmd, CacheBlk::State state) { - if (cmd == Hard_Prefetch && state) + if (cmd == Packet::HardPFReq && state) warn("Trying to issue a prefetch to a block we already have\n"); - if (cmd == Writeback) - return Writeback; - return Read; + if (cmd == Packet::Writeback) + return Packet::Writeback; + return Packet::ReadReq; } /** @@ -96,7 +95,7 @@ class UniCoherence { if (pkt->senderState) //Blocking Buffers don't get mshrs { - if (pkt->senderState->originalCmd == Hard_Prefetch) { + if (((MSHR *)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n"); return BlkHWPrefetched | BlkValid | BlkWritable; } diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 08814b43e6..39a06a3779 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -166,7 +166,7 @@ public: * has been sent to the bus, this function removes all of its targets. * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index d459821086..b88b7038c6 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -270,7 +270,7 @@ class MissQueue * has been sent to the bus, this function removes all of its targets. * @param req->getThreadNum()ber The thread number of the requests to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Return the current number of outstanding misses. diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 563368d292..a67f1b9a6f 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -192,7 +192,7 @@ class MSHRQueue { * is in service, just squashes the targets. * @param req->getThreadNum()ber The thread to squash. */ - void squash(int req->getThreadNum()ber); + void squash(int threadNum); /** * Returns true if the pending list is not empty. diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 176c6f793b..3d37686768 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -41,10 +41,17 @@ #include "mem/request.hh" #include "arch/isa_traits.hh" #include "sim/root.hh" +#include struct Packet; typedef Packet* PacketPtr; typedef uint8_t* PacketDataPtr; +typedef std::list PacketList; + +//Coherence Flags +#define NACKED_LINE 1 << 0 +#define SATISFIED 1 << 1 +#define SHARED_LINE 1 << 2 //For statistics we need max number of commands, hard code it at //20 for now. @todo fix later @@ -173,7 +180,10 @@ class Packet SoftPFReq = IsRead | IsRequest | IsSWPrefetch | NeedsResponse, HardPFReq = IsRead | IsRequest | IsHWPrefetch | NeedsResponse, SoftPFResp = IsRead | IsRequest | IsSWPrefetch | IsResponse, - HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse + HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, + InvalidateReq = IsInvalidate | IsRequest, + WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, + UpgradeReq = IsInvalidate | NeedsResponse }; /** Return the string name of the cmd field (for debugging and @@ -190,9 +200,14 @@ class Packet Command cmd; bool isRead() { return (cmd & IsRead) != 0; } + bool isWrite() { return (cmd & IsWrite) != 0; } bool isRequest() { return (cmd & IsRequest) != 0; } bool isResponse() { return (cmd & IsResponse) != 0; } bool needsResponse() { return (cmd & NeedsResponse) != 0; } + bool isInvalidate() { return (cmd * IsInvalidate) != 0; } + + bool isCacheFill() { assert("Unimplemented yet\n" && 0); } + bool isNoAllocate() { assert("Unimplemented yet\n" && 0); } /** Possible results of a packet's request. */ enum Result diff --git a/src/mem/request.hh b/src/mem/request.hh index 469184b130..46d9b6fd70 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -44,6 +44,7 @@ class Request; typedef Request* RequestPtr; + /** The request is a Load locked/store conditional. */ const unsigned LOCKED = 0x001; /** The virtual address is also the physical address. */ From eafb5c4936f7d3233c223d69b435c6be360bbfb2 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Thu, 29 Jun 2006 16:07:19 -0400 Subject: [PATCH 04/17] Still missing prefetch and tags directories as well as cache builder. Some implementation details were left blank still, need to fill them in. src/SConscript: Reorder build to compile all files first src/mem/cache/cache.hh: src/mem/cache/cache_builder.cc: src/mem/cache/cache_impl.hh: src/mem/cache/coherence/coherence_protocol.cc: src/mem/cache/coherence/uni_coherence.cc: src/mem/cache/coherence/uni_coherence.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/mshr.cc: src/mem/cache/miss/mshr.hh: src/mem/cache/miss/mshr_queue.cc: More changesets pulled, now compiles everything in /miss directory and in the root directory src/mem/packet.hh: Add some more support, need to clean some of it out once everything is working --HG-- extra : convert_revision : ba73676165810edf2c2effaf5fbad8397d6bd800 --- src/SConscript | 3 +- src/mem/cache/cache.hh | 35 ++- src/mem/cache/cache_builder.cc | 38 ++- src/mem/cache/cache_impl.hh | 185 ++++++------ src/mem/cache/coherence/coherence_protocol.cc | 283 +++++++++--------- src/mem/cache/coherence/uni_coherence.cc | 13 +- src/mem/cache/coherence/uni_coherence.hh | 1 + src/mem/cache/miss/blocking_buffer.cc | 91 +++--- src/mem/cache/miss/miss_queue.cc | 163 +++++----- src/mem/cache/miss/mshr.cc | 35 ++- src/mem/cache/miss/mshr.hh | 2 +- src/mem/cache/miss/mshr_queue.cc | 30 +- src/mem/packet.hh | 44 ++- 13 files changed, 491 insertions(+), 432 deletions(-) diff --git a/src/SConscript b/src/SConscript index ff41e59316..04da17ee63 100644 --- a/src/SConscript +++ b/src/SConscript @@ -103,7 +103,6 @@ base_sources = Split(''' mem/cache/base_cache.cc mem/cache/cache.cc - mem/cache/cache_builder.cc mem/cache/coherence/coherence_protocol.cc mem/cache/coherence/uni_coherence.cc mem/cache/miss/blocking_buffer.cc @@ -126,6 +125,8 @@ base_sources = Split(''' mem/cache/tags/split_lifo.cc mem/cache/tags/split_lru.cc + mem/cache/cache_builder.cc + sim/builder.cc sim/debug.cc sim/eventq.cc diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 587faaf511..d2af1d8bf4 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -44,8 +44,9 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/prefetch/prefetcher.hh" -// forward declarations -class Bus; +//Forward decleration +class MSHR; + /** * A template-policy based cache. The behavior of the cache can be altered by @@ -92,6 +93,11 @@ class Cache : public BaseCache */ int busWidth; + /** + * The latency of a hit in this device. + */ + int hitLatency; + /** * A permanent mem req to always be used to cause invalidations. * Used to append to target list, to cause an invalidation. @@ -121,18 +127,18 @@ class Cache : public BaseCache bool doCopy; bool blockOnCopy; BaseCache::Params baseParams; - Bus *in; - Bus *out; Prefetcher *prefetcher; bool prefetchAccess; + int hitLatency; Params(TagStore *_tags, Buffering *mq, Coherence *coh, - bool do_copy, BaseCache::Params params, Bus * in_bus, - Bus * out_bus, Prefetcher *_prefetcher, - bool prefetch_access) + bool do_copy, BaseCache::Params params, + Prefetcher *_prefetcher, + bool prefetch_access, int hit_latency) : tags(_tags), missQueue(mq), coherence(coh), doCopy(do_copy), - blockOnCopy(false), baseParams(params), in(in_bus), out(out_bus), - prefetcher(_prefetcher), prefetchAccess(prefetch_access) + blockOnCopy(false), baseParams(params), + prefetcher(_prefetcher), prefetchAccess(prefetch_access), + hitLatency(hit_latency) { } }; @@ -140,6 +146,17 @@ class Cache : public BaseCache /** Instantiates a basic cache object. */ Cache(const std::string &_name, Params ¶ms); + bool doTimingAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + void doFunctionalAccess(Packet *pkt, CachePort *cachePort, + bool isCpuSide); + + void recvStatusChange(Port::Status status, bool isCpuSide); + void regStats(); /** diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index e3efb9bc36..8758dc57a4 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -42,7 +42,7 @@ #include "mem/cache/base_cache.hh" #include "mem/cache/cache.hh" -#include "mem/bus/bus.hh" +#include "mem/bus.hh" #include "mem/cache/coherence/coherence_protocol.hh" #include "sim/builder.hh" @@ -84,13 +84,6 @@ #include "mem/cache/coherence/uni_coherence.hh" #include "mem/cache/coherence/simple_coherence.hh" -// Bus Interfaces -#include "mem/bus/slave_interface.hh" -#include "mem/bus/master_interface.hh" -#include "mem/memory_interface.hh" - -#include "mem/trace/mem_trace_writer.hh" - //Prefetcher Headers #if defined(USE_GHB) #include "mem/cache/prefetch/ghb_prefetcher.hh" @@ -118,8 +111,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param tgts_per_mshr; Param write_buffers; Param prioritizeRequests; - SimObjectParam in_bus; - SimObjectParam out_bus; +// SimObjectParam in_bus; +// SimObjectParam out_bus; Param do_copy; SimObjectParam protocol; Param trace_addr; @@ -133,9 +126,9 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param compression_latency; Param subblock_size; Param max_miss_count; - SimObjectParam hier; +// SimObjectParam hier; VectorParam > addr_range; - SimObjectParam mem_trace; +// SimObjectParam mem_trace; Param split; Param split_size; Param lifo; @@ -151,6 +144,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache) Param prefetch_cache_check_push; Param prefetch_use_cpu_id; Param prefetch_data_accesses_only; + Param hit_latency; END_DECLARE_SIM_OBJECT_PARAMS(BaseCache) @@ -166,8 +160,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8), INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first", false), - INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), +/* INIT_PARAM_DFLT(in_bus, "incoming bus object", NULL), INIT_PARAM(out_bus, "outgoing bus object"), +*/ INIT_PARAM_DFLT(do_copy, "perform fast copies in the cache", false), INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL), INIT_PARAM_DFLT(trace_addr, "address to trace", 0), @@ -192,12 +187,13 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(max_miss_count, "The number of misses to handle before calling exit", 0), - INIT_PARAM_DFLT(hier, +/* INIT_PARAM_DFLT(hier, "Hierarchy global variables", &defaultHierParams), +*/ INIT_PARAM_DFLT(addr_range, "The address range in bytes", vector >(1,RangeIn((Addr)0, MaxAddr))), - INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), +// INIT_PARAM_DFLT(mem_trace, "Memory trace to write accesses to", NULL), INIT_PARAM_DFLT(split, "Whether this is a partitioned cache", false), INIT_PARAM_DFLT(split_size, "the number of \"ways\" belonging to the LRU partition", 0), INIT_PARAM_DFLT(lifo, "whether you are using a LIFO repl. policy", false), @@ -212,7 +208,8 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache) INIT_PARAM_DFLT(prefetch_policy, "Type of prefetcher to use", "none"), INIT_PARAM_DFLT(prefetch_cache_check_push, "Check if in cash on push or pop of prefetch queue", true), INIT_PARAM_DFLT(prefetch_use_cpu_id, "Use the CPU ID to seperate calculations of prefetches", true), - INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false) + INIT_PARAM_DFLT(prefetch_data_accesses_only, "Only prefetch on data not on instruction accesses", false), + INIT_PARAM_DFLT(hit_latency, "Hit Latecny for a succesful access", 1) END_INIT_SIM_OBJECT_PARAMS(BaseCache) @@ -232,12 +229,12 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) } \ Cache, b, c>::Params params(tagStore, mq, coh, \ do_copy, base_params, \ - in_bus, out_bus, pf, \ + /*in_bus, out_bus,*/ pf, \ prefetch_access); \ Cache, b, c> *retval = \ - new Cache, b, c>(getInstanceName(), hier, \ + new Cache, b, c>(getInstanceName(), /*hier,*/ \ params); \ - if (in_bus == NULL) { \ +/* if (in_bus == NULL) { \ retval->setSlaveInterface(new MemoryInterface, b, c> >(getInstanceName(), hier, retval, mem_trace)); \ } else { \ retval->setSlaveInterface(new SlaveInterface, b, c>, Bus>(getInstanceName(), hier, retval, in_bus, mem_trace)); \ @@ -245,6 +242,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ out_bus->rangeChange(); \ return retval; \ +*/return true; \ } while (0) #define BUILD_CACHE_PANIC(x) do { \ @@ -465,7 +463,7 @@ CREATE_SIM_OBJECT(BaseCache) const void *repl = NULL; #endif - if (mshrs == 1 || out_bus->doEvents() == false) { + if (mshrs == 1 /*|| out_bus->doEvents() == false*/) { BlockingBuffer *mq = new BlockingBuffer(true); BUILD_COHERENCE(BlockingBuffer); } else { diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 699d874deb..dbf2e49f14 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -90,6 +90,8 @@ doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) else snoopProbe(pkt, true); } + //Fix this timing info + return hitLatency; } template @@ -129,10 +131,12 @@ Cache(const std::string &_name, coherence(params.coherence), prefetcher(params.prefetcher), doCopy(params.doCopy), blockOnCopy(params.blockOnCopy) { - if (params.in == NULL) { +//FIX BUS POINTERS +// if (params.in == NULL) { topLevelCache = true; - } - tags->setCache(this, params.out->width, params.out->clockRate); +// } +//PLEASE FIX THIS, BUS SIZES NOT BEING USED + tags->setCache(this, blkSize, 1/*params.out->width, params.out->clockRate*/); tags->setPrefetcher(prefetcher); missQueue->setCache(this); missQueue->setPrefetcher(prefetcher); @@ -140,8 +144,10 @@ Cache(const std::string &_name, prefetcher->setCache(this); prefetcher->setTags(tags); prefetcher->setBuffer(missQueue); +#if 0 invalidatePkt = new Packet; invalidatePkt->cmd = Packet::InvalidateReq; +#endif } template @@ -175,27 +181,27 @@ Cache::access(PacketPtr &pkt) //Upgrade or Invalidate //Look into what happens if two slave caches on bus DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), - pkt->addr & ~((Addr)blkSize - 1)); + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); //@todo Should this return latency have the hit latency in it? // respond(pkt,curTick+lat); - (int)pkt->coherence |= SATISFIED; + pkt->flags |= SATISFIED; // return MA_HIT; //@todo, return values return true; } blk = tags->handleAccess(pkt, lat, writebacks); } else { - size = pkt->size; + size = pkt->getSize(); } // If this is a block size write/hint (WH64) allocate the block here // if the coherence protocol allows it. /** @todo make the fast write alloc (wh64) work with coherence. */ /** @todo Do we want to do fast writes for writebacks as well? */ - if (!blk && pkt->size >= blkSize && coherence->allowFastWrites() && + if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && (pkt->cmd == Packet::WriteReq || pkt->cmd == Packet::WriteInvalidateReq) ) { // not outstanding misses, can do this - MSHR* outstanding_miss = missQueue->findMSHR(pkt->addr, pkt->req->asid); + MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr(), pkt->req->getAsid()); if (pkt->cmd == Packet::WriteInvalidateReq || !outstanding_miss) { if (outstanding_miss) { warn("WriteInv doing a fastallocate" @@ -211,8 +217,8 @@ Cache::access(PacketPtr &pkt) writebacks.pop_front(); } DPRINTF(Cache, "%s %d %x %s blk_addr: %x pc %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->addr & ~((Addr)blkSize - 1), pkt->req->pc); + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", + pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC()); if (blk) { // Hit hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; @@ -230,7 +236,7 @@ Cache::access(PacketPtr &pkt) if (missCount) { --missCount; if (missCount == 0) - new SimLoopExitEvent("A cache reached the maximum miss count"); + new SimLoopExitEvent(curTick, "A cache reached the maximum miss count"); } } missQueue->handleMiss(pkt, size, curTick + hitLatency); @@ -281,14 +287,14 @@ Cache::handleResponse(Packet * &pkt) BlkType *blk = NULL; if (pkt->senderState) { // MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->addr, - pkt->addr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), + pkt->getAddr() & (((ULL(1))<<48)-1)); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); CacheBlk::State old_state = (blk) ? blk->status : 0; PacketList writebacks; - blk = tags->handleFill(blk, pkt->senderState, + blk = tags->handleFill(blk, (MSHR*)pkt->senderState, coherence->getNewState(pkt,old_state), writebacks); while (!writebacks.empty()) { @@ -310,7 +316,7 @@ Cache::pseudoFill(Addr addr, int asid) // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert((int)mshr->pkt->coherence & SATISFIED); + assert(mshr->pkt->flags & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. @@ -328,11 +334,11 @@ Cache::pseudoFill(MSHR *mshr) // Read the data into the mshr BlkType *blk = tags->handleAccess(mshr->pkt, lat, dummy, false); assert(dummy.empty()); - assert((int)mshr->pkt->coherence & SATISFIED); + assert(mshr->pkt->flags & SATISFIED); // can overload order since it isn't used on non pending blocks mshr->order = blk->status; // temporarily remove the block from the cache. - tags->invalidateBlk(mshr->pkt->addr, mshr->pkt->req->asid); + tags->invalidateBlk(mshr->pkt->getAddr(), mshr->pkt->req->getAsid()); } @@ -348,9 +354,10 @@ template void Cache::snoop(Packet * &pkt) { - Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); + + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); - MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); if (isTopLevel() && coherence->hasProtocol()) { //@todo Move this into handle bus req //If we find an mshr, and it is in service, we need to NACK or invalidate if (mshr) { @@ -360,7 +367,7 @@ Cache::snoop(Packet * &pkt) //If the outstanding request was an invalidate (upgrade,readex,..) //Then we need to ACK the request until we get the data //Also NACK if the outstanding request is not a cachefill (writeback) - (int)pkt->coherence |= NACKED_LINE; + pkt->flags |= NACKED_LINE; return; } else { @@ -373,19 +380,19 @@ Cache::snoop(Packet * &pkt) //@todo Make it so that a read to a pending read can't be exclusive now. //Set the address so find match works - invalidatePkt->addr = pkt->addr; + invalidatePkt->addrOverride(pkt->getAddr()); //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); return; } } } //We also need to check the writeback buffers and handle those std::vector writebacks; - if (missQueue->findWrites(blk_addr, pkt->req->asid, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->addr & (((ULL(1))<<48)-1)); + if (missQueue->findWrites(blk_addr, pkt->req->getAsid(), writebacks)) { + DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", pkt->getAddr() & (((ULL(1))<<48)-1)); //Look through writebacks for any non-uncachable writes, use that for (int i=0; i::snoop(Packet * &pkt) if (pkt->isRead()) { //Only Upgrades don't get here //Supply the data - (int)pkt->coherence |= SATISFIED; + pkt->flags |= SATISFIED; //If we are in an exclusive protocol, make it ask again //to get write permissions (upgrade), signal shared - (int)pkt->coherence |= SHARED_LINE; + pkt->flags |= SHARED_LINE; assert(pkt->isRead()); - assert(pkt->offset < blkSize); - assert(pkt->size <= blkSize); - assert(pkt->offset + pkt->size <=blkSize); - memcpy(pkt->data, mshr->pkt->data + pkt->offset, pkt->size); + Addr offset = pkt->getAddr() & ~(blkSize - 1); + assert(offset < blkSize); + assert(pkt->getSize() <= blkSize); + assert(offset + pkt->getSize() <=blkSize); + memcpy(pkt->getPtr(), mshr->pkt->getPtr() + offset, pkt->getSize()); respondToSnoop(pkt); } @@ -434,7 +442,7 @@ void Cache::snoopResponse(Packet * &pkt) { //Need to handle the response, if NACKED - if ((int)pkt->coherence & NACKED_LINE) { + if (pkt->flags & NACKED_LINE) { //Need to mark it as not in service, and retry for bus assert(0); //Yeah, we saw a NACK come through @@ -463,41 +471,35 @@ Tick Cache::probe(Packet * &pkt, bool update) { // MemDebug::cacheProbe(pkt); - if (!pkt->req->isUncacheable()) { if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward DPRINTF(Cache, "%s %d %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->req->asid, pkt->addr & (((ULL(1))<<48)-1), - pkt->addr & ~((Addr)blkSize - 1)); - (int)pkt->coherence |= SATISFIED; + pkt->req->getAsid(), pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr() & ~((Addr)blkSize - 1)); + pkt->flags |= SATISFIED; return 0; } } - if (!update && !doData()) { - // Nothing to do here - return mi->sendProbe(pkt,update); - } - PacketList writebacks; int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); if (!blk) { // Need to check for outstanding misses and writes - Addr blk_addr = pkt->addr & ~(blkSize - 1); + Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); // There can only be one matching outstanding miss. - MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR* mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); // There can be many matching outstanding writes. vector writes; - missQueue->findWrites(blk_addr, pkt->req->asid, writes); + missQueue->findWrites(blk_addr, pkt->req->getAsid(), writes); if (!update) { - mi->sendProbe(pkt, update); + memSidePort->sendFunctional(pkt); // Check for data in MSHR and writebuffer. if (mshr) { warn("Found outstanding miss on an non-update probe"); @@ -508,26 +510,26 @@ Cache::probe(Packet * &pkt, bool update) Packet * target = *i; // If the target contains data, and it overlaps the // probed request, need to update data - if (target->isWrite() && target->overlaps(pkt)) { + if (target->isWrite() && target->intersect(pkt)) { uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (target->addr < pkt->addr) { - int offset = pkt->addr - target->paddr; - pkt_data = pkt->data; - write_data = target->data + offset; - data_size = target->size - offset; + if (target->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - target->getAddr(); + pkt_data = pkt->getPtr(); + write_data = target->getPtr() + offset; + data_size = target->getSize() - offset; assert(data_size > 0); - if (data_size > pkt->size) - data_size = pkt->size; + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); } else { - int offset = target->addr - pkt->addr; - pkt_data = pkt->data + offset; - write_data = target->data; - data_size = pkt->size - offset; - assert(data_size > pkt->size); - if (data_size > target->size) - data_size = target->size; + int offset = target->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr() + offset; + write_data = target->getPtr(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > target->getSize()) + data_size = target->getSize(); } if (pkt->isWrite()) { @@ -540,27 +542,27 @@ Cache::probe(Packet * &pkt, bool update) } for (int i = 0; i < writes.size(); ++i) { Packet * write = writes[i]->pkt; - if (write->overlaps(pkt)) { + if (write->intersect(pkt)) { warn("Found outstanding write on an non-update probe"); uint8_t* pkt_data; uint8_t* write_data; int data_size; - if (write->addr < pkt->addr) { - int offset = pkt->addr - write->addr; - pkt_data = pkt->data; - write_data = write->data + offset; - data_size = write->size - offset; + if (write->getAddr() < pkt->getAddr()) { + int offset = pkt->getAddr() - write->getAddr(); + pkt_data = pkt->getPtr(); + write_data = write->getPtr() + offset; + data_size = write->getSize() - offset; assert(data_size > 0); - if (data_size > pkt->size) - data_size = pkt->size; + if (data_size > pkt->getSize()) + data_size = pkt->getSize(); } else { - int offset = write->addr - pkt->addr; - pkt_data = pkt->data + offset; - write_data = write->data; - data_size = pkt->size - offset; - assert(data_size > pkt->size); - if (data_size > write->size) - data_size = write->size; + int offset = write->getAddr() - pkt->getAddr(); + pkt_data = pkt->getPtr() + offset; + write_data = write->getPtr(); + data_size = pkt->getSize() - offset; + assert(data_size > pkt->getSize()); + if (data_size > write->getSize()) + data_size = write->getSize(); } if (pkt->isWrite()) { @@ -580,23 +582,20 @@ Cache::probe(Packet * &pkt, bool update) } if (!pkt->req->isUncacheable()) { // Fetch the cache block to fill - Packet * busPkt = new Packet(); - busPkt->addr = blk_addr; - busPkt->size = blkSize; - busPkt->data = new uint8_t[blkSize]; - BlkType *blk = tags->findBlock(pkt); - busPkt->cmd = coherence->getBusCmd(pkt->cmd, + Packet::Command temp_cmd = coherence->getBusCmd(pkt->cmd, (blk)? blk->status : 0); - busPkt->req->asid = pkt->req->asid; - busPkt->xc = pkt->xc; - busPkt->req->setThreadNum() = pkt->req->getThreadNum(); + Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); + + uint8_t* temp_data = new uint8_t[blkSize]; + busPkt->dataDynamicArray(temp_data); + busPkt->time = curTick; - lat = mi->sendProbe(busPkt, update); + lat = memSidePort->sendAtomic(busPkt); - if (!busPkt->isSatisfied()) { + if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return return 0; } @@ -609,19 +608,19 @@ Cache::probe(Packet * &pkt, bool update) writebacks, pkt); // Handle writebacks if needed while (!writebacks.empty()){ - mi->sendProbe(writebacks.front(), update); + memSidePort->sendAtomic(writebacks.front()); writebacks.pop_front(); } return lat + hitLatency; } else { - return mi->sendProbe(pkt,update); + return memSidePort->sendAtomic(pkt); } } } else { // There was a cache hit. // Handle writebacks if needed while (!writebacks.empty()){ - mi->sendProbe(writebacks.front(), update); + memSidePort->sendAtomic(writebacks.front()); writebacks.pop_front(); } @@ -629,7 +628,7 @@ Cache::probe(Packet * &pkt, bool update) hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; } else if (pkt->isWrite()) { // Still need to change data in all locations. - return mi->sendProbe(pkt, update); + return memSidePort->sendAtomic(pkt); } return curTick + lat; } @@ -641,11 +640,11 @@ template Tick Cache::snoopProbe(PacketPtr &pkt, bool update) { - Addr blk_addr = pkt->addr & ~(Addr(blkSize-1)); + Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); BlkType *blk = tags->findBlock(pkt); - MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->asid); + MSHR *mshr = missQueue->findMSHR(blk_addr, pkt->req->getAsid()); CacheBlk::State new_state = 0; - bool satisfy = coherence->handleBusPktuest(pkt,blk,mshr, new_state); + bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); if (satisfy) { tags->handleSnoop(blk, new_state, pkt); return hitLatency; diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc index 9d5b8ef546..bcf3ce9c50 100644 --- a/src/mem/cache/coherence/coherence_protocol.cc +++ b/src/mem/cache/coherence/coherence_protocol.cc @@ -47,7 +47,7 @@ using namespace std; CoherenceProtocol::StateTransition::StateTransition() - : busCmd(InvalidCmd), newState(-1), snoopFunc(invalidTransition) + : busCmd(Packet::InvalidCmd), newState(-1), snoopFunc(invalidTransition) { } @@ -59,132 +59,132 @@ CoherenceProtocol::regStats() // requestCount and snoopCount arrays, most of these are invalid, // so we just select the interesting ones to print here. - requestCount[Invalid][Read] + requestCount[Invalid][Packet::ReadReq] .name(name() + ".read_invalid") .desc("read misses to invalid blocks") ; - requestCount[Invalid][Write] + requestCount[Invalid][Packet::WriteReq] .name(name() +".write_invalid") .desc("write misses to invalid blocks") ; - requestCount[Invalid][Soft_Prefetch] + requestCount[Invalid][Packet::SoftPFReq] .name(name() +".swpf_invalid") .desc("soft prefetch misses to invalid blocks") ; - requestCount[Invalid][Hard_Prefetch] + requestCount[Invalid][Packet::HardPFReq] .name(name() +".hwpf_invalid") .desc("hard prefetch misses to invalid blocks") ; - requestCount[Shared][Write] + requestCount[Shared][Packet::WriteReq] .name(name() + ".write_shared") .desc("write misses to shared blocks") ; - requestCount[Owned][Write] + requestCount[Owned][Packet::WriteReq] .name(name() + ".write_owned") .desc("write misses to owned blocks") ; - snoopCount[Shared][Read] + snoopCount[Shared][Packet::ReadReq] .name(name() + ".snoop_read_shared") .desc("read snoops on shared blocks") ; - snoopCount[Shared][ReadEx] + snoopCount[Shared][Packet::ReadExReq] .name(name() + ".snoop_readex_shared") .desc("readEx snoops on shared blocks") ; - snoopCount[Shared][Upgrade] + snoopCount[Shared][Packet::UpgradeReq] .name(name() + ".snoop_upgrade_shared") .desc("upgradee snoops on shared blocks") ; - snoopCount[Modified][Read] + snoopCount[Modified][Packet::ReadReq] .name(name() + ".snoop_read_modified") .desc("read snoops on modified blocks") ; - snoopCount[Modified][ReadEx] + snoopCount[Modified][Packet::ReadExReq] .name(name() + ".snoop_readex_modified") .desc("readEx snoops on modified blocks") ; - snoopCount[Owned][Read] + snoopCount[Owned][Packet::ReadReq] .name(name() + ".snoop_read_owned") .desc("read snoops on owned blocks") ; - snoopCount[Owned][ReadEx] + snoopCount[Owned][Packet::ReadExReq] .name(name() + ".snoop_readex_owned") .desc("readEx snoops on owned blocks") ; - snoopCount[Owned][Upgrade] + snoopCount[Owned][Packet::UpgradeReq] .name(name() + ".snoop_upgrade_owned") .desc("upgrade snoops on owned blocks") ; - snoopCount[Exclusive][Read] + snoopCount[Exclusive][Packet::ReadReq] .name(name() + ".snoop_read_exclusive") .desc("read snoops on exclusive blocks") ; - snoopCount[Exclusive][ReadEx] + snoopCount[Exclusive][Packet::ReadExReq] .name(name() + ".snoop_readex_exclusive") .desc("readEx snoops on exclusive blocks") ; - snoopCount[Shared][Invalidate] + snoopCount[Shared][Packet::InvalidateReq] .name(name() + ".snoop_inv_shared") .desc("Invalidate snoops on shared blocks") ; - snoopCount[Owned][Invalidate] + snoopCount[Owned][Packet::InvalidateReq] .name(name() + ".snoop_inv_owned") .desc("Invalidate snoops on owned blocks") ; - snoopCount[Exclusive][Invalidate] + snoopCount[Exclusive][Packet::InvalidateReq] .name(name() + ".snoop_inv_exclusive") .desc("Invalidate snoops on exclusive blocks") ; - snoopCount[Modified][Invalidate] + snoopCount[Modified][Packet::InvalidateReq] .name(name() + ".snoop_inv_modified") .desc("Invalidate snoops on modified blocks") ; - snoopCount[Invalid][Invalidate] + snoopCount[Invalid][Packet::InvalidateReq] .name(name() + ".snoop_inv_invalid") .desc("Invalidate snoops on invalid blocks") ; - snoopCount[Shared][WriteInvalidate] + snoopCount[Shared][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_shared") .desc("WriteInvalidate snoops on shared blocks") ; - snoopCount[Owned][WriteInvalidate] + snoopCount[Owned][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_owned") .desc("WriteInvalidate snoops on owned blocks") ; - snoopCount[Exclusive][WriteInvalidate] + snoopCount[Exclusive][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_exclusive") .desc("WriteInvalidate snoops on exclusive blocks") ; - snoopCount[Modified][WriteInvalidate] + snoopCount[Modified][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_modified") .desc("WriteInvalidate snoops on modified blocks") ; - snoopCount[Invalid][WriteInvalidate] + snoopCount[Invalid][Packet::WriteInvalidateReq] .name(name() + ".snoop_writeinv_invalid") .desc("WriteInvalidate snoops on invalid blocks") ; @@ -270,167 +270,168 @@ CoherenceProtocol::CoherenceProtocol(const string &name, fatal(""); } - Packet::CommandEnum writeToSharedCmd = doUpgrades ? Upgrade : ReadEx; + Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq; + Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp; //@todo add in hardware prefetch to this list if (protocol == "msi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Shared); - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(nullTransition); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); if (doUpgrades) { - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); } } else if(protocol == "mesi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Exclusive); + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); //It will move into shared if the shared line is asserted in the //getNewState function - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Exclusive][Read].onSnoop(assertShared); - transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoSharedTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoSharedTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); if (doUpgrades) { - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); } } else if(protocol == "mosi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); - transitionTable[Owned][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Shared); - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); - transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadResp].onResponse(Shared); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); //Tansitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); } else if(protocol == "moesi") { // incoming requests: specify outgoing bus request - transitionTable[Invalid][Read].onRequest(Read); - transitionTable[Invalid][Write].onRequest(ReadEx); - transitionTable[Shared][Write].onRequest(writeToSharedCmd); - transitionTable[Owned][Write].onRequest(writeToSharedCmd); + transitionTable[Invalid][Packet::ReadReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::WriteReq].onRequest(Packet::ReadExReq); + transitionTable[Shared][Packet::WriteReq].onRequest(writeToSharedCmd); + transitionTable[Owned][Packet::WriteReq].onRequest(writeToSharedCmd); //Prefetching causes a read - transitionTable[Invalid][Soft_Prefetch].onRequest(Read); - transitionTable[Invalid][Hard_Prefetch].onRequest(Read); + transitionTable[Invalid][Packet::SoftPFReq].onRequest(Packet::ReadReq); + transitionTable[Invalid][Packet::HardPFReq].onRequest(Packet::ReadReq); // on response to given request: specify new state - transitionTable[Invalid][Read].onResponse(Exclusive); + transitionTable[Invalid][Packet::ReadResp].onResponse(Exclusive); //It will move into shared if the shared line is asserted in the //getNewState function - transitionTable[Invalid][ReadEx].onResponse(Modified); - transitionTable[Shared][writeToSharedCmd].onResponse(Modified); - transitionTable[Owned][writeToSharedCmd].onResponse(Modified); + transitionTable[Invalid][Packet::ReadExResp].onResponse(Modified); + transitionTable[Shared][writeToSharedResp].onResponse(Modified); + transitionTable[Owned][writeToSharedResp].onResponse(Modified); // bus snoop transition functions - transitionTable[Invalid][Read].onSnoop(nullTransition); - transitionTable[Invalid][ReadEx].onSnoop(nullTransition); - transitionTable[Invalid][Upgrade].onSnoop(nullTransition); - transitionTable[Shared][Read].onSnoop(assertShared); - transitionTable[Shared][ReadEx].onSnoop(invalidateTrans); - transitionTable[Shared][Upgrade].onSnoop(invalidateTrans); - transitionTable[Exclusive][Read].onSnoop(assertShared); - transitionTable[Exclusive][ReadEx].onSnoop(invalidateTrans); - transitionTable[Modified][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Modified][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Read].onSnoop(supplyAndGotoOwnedTrans); - transitionTable[Owned][ReadEx].onSnoop(supplyAndInvalidateTrans); - transitionTable[Owned][Upgrade].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::ReadReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::ReadExReq].onSnoop(nullTransition); + transitionTable[Invalid][Packet::UpgradeReq].onSnoop(nullTransition); + transitionTable[Shared][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Shared][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::UpgradeReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::ReadReq].onSnoop(assertShared); + transitionTable[Exclusive][Packet::ReadExReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Modified][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::ReadReq].onSnoop(supplyAndGotoOwnedTrans); + transitionTable[Owned][Packet::ReadExReq].onSnoop(supplyAndInvalidateTrans); + transitionTable[Owned][Packet::UpgradeReq].onSnoop(invalidateTrans); //Transitions on seeing a DMA (writeInv(samelevel) or DMAInv) - transitionTable[Invalid][Invalidate].onSnoop(invalidateTrans); - transitionTable[Shared][Invalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][Invalidate].onSnoop(invalidateTrans); - transitionTable[Modified][Invalidate].onSnoop(invalidateTrans); - transitionTable[Owned][Invalidate].onSnoop(invalidateTrans); - transitionTable[Invalid][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Shared][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Exclusive][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Modified][WriteInvalidate].onSnoop(invalidateTrans); - transitionTable[Owned][WriteInvalidate].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::InvalidateReq].onSnoop(invalidateTrans); + transitionTable[Invalid][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Shared][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Exclusive][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Modified][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); + transitionTable[Owned][Packet::WriteInvalidateReq].onSnoop(invalidateTrans); } else { @@ -446,14 +447,14 @@ CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, MSHR *mshr) { state &= stateMask; - int cmd_idx = cmdIn.toIndex(); + int cmd_idx = (int) cmdIn; assert(0 <= state && state <= stateMax); assert(0 <= cmd_idx && cmd_idx < NUM_MEM_CMDS); Packet::Command cmdOut = transitionTable[state][cmd_idx].busCmd; - assert(cmdOut != InvalidCmd); + assert(cmdOut != Packet::InvalidCmd); ++requestCount[state][cmd_idx]; @@ -462,7 +463,7 @@ CoherenceProtocol::getBusCmd(Packet::Command cmdIn, CacheBlk::State state, CacheBlk::State -CoherenceProtocol::getNewState(const Packet * &pkt, CacheBlk::State oldState) +CoherenceProtocol::getNewState(Packet * &pkt, CacheBlk::State oldState) { CacheBlk::State state = oldState & stateMask; int cmd_idx = pkt->cmdToIndex(); diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc index 68a78e3951..5ab7062692 100644 --- a/src/mem/cache/coherence/uni_coherence.cc +++ b/src/mem/cache/coherence/uni_coherence.cc @@ -44,8 +44,8 @@ Packet * UniCoherence::getPacket() { bool unblock = cshrs.isFull(); - Packet * pkt = cshrs.getPkt(); - cshrs.markInService(pkt->senderState); + Packet* pkt = cshrs.getReq(); + cshrs.markInService((MSHR*)pkt->senderState); if (!cshrs.havePending()) { cache->clearSlaveRequest(Request_Coherence); } @@ -65,15 +65,12 @@ UniCoherence::handleBusRequest(Packet * &pkt, CacheBlk *blk, MSHR *mshr, CacheBlk::State &new_state) { new_state = 0; - if (pkt->cmd.isInvalidate()) { + if (pkt->isInvalidate()) { DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n", - pkt->paddr, blk); + pkt->getAddr(), blk); if (!cache->isTopLevel()) { // Forward to other caches - Packet * tmp = new MemPkt(); - tmp->cmd = Invalidate; - tmp->paddr = pkt->paddr; - tmp->size = pkt->size; + Packet * tmp = new Packet(pkt->req, Packet::InvalidateReq, -1); cshrs.allocate(tmp); cache->setSlaveRequest(Request_Coherence, curTick); if (cshrs.isFull()) { diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh index 4e895997f6..764bf62761 100644 --- a/src/mem/cache/coherence/uni_coherence.hh +++ b/src/mem/cache/coherence/uni_coherence.hh @@ -32,6 +32,7 @@ #define __UNI_COHERENCE_HH__ #include "base/trace.hh" +#include "base/misc.hh" #include "mem/cache/cache_blk.hh" #include "mem/cache/miss/mshr_queue.hh" #include "mem/packet.hh" diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index 912a0f5bd0..d745cb8c65 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -33,12 +33,12 @@ * Definitions of a simple buffer for a blocking cache. */ -#include "cpu/exec_context.hh" #include "cpu/smt.hh" //for maxThreadsPerCPU #include "mem/cache/base_cache.hh" #include "mem/cache/miss/blocking_buffer.hh" #include "mem/cache/prefetch/base_prefetcher.hh" #include "sim/eventq.hh" // for Event declaration. +#include "mem/request.hh" using namespace TheISA; @@ -72,26 +72,26 @@ BlockingBuffer::setPrefetcher(BasePrefetcher *_prefetcher) void BlockingBuffer::handleMiss(Packet * &pkt, int blk_size, Tick time) { - Addr blk_addr = pkt->paddr & ~(Addr)(blk_size - 1); - if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - pkt->cmd.isNoResponse())) { - if (pkt->cmd.isNoResponse()) { + Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1); + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { + if (!pkt->needsResponse()) { wb.allocateAsBuffer(pkt); } else { - wb.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); - } - if (cache->doData()) { - memcpy(wb.pkt->data, pkt->data, blk_size); + wb.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); } + + memcpy(wb.pkt->getPtr(), pkt->getPtr(), blk_size); + cache->setBlocked(Blocked_NoWBBuffers); cache->setMasterRequest(Request_WB, time); return; } - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { miss.allocateAsBuffer(pkt); } else { - miss.allocate(pkt->cmd, blk_addr, pkt->req->asid, blk_size, pkt); + miss.allocate(pkt->cmd, blk_addr, pkt->req->getAsid(), blk_size, pkt); } if (!pkt->req->isUncacheable()) { miss.pkt->flags |= CACHE_LINE_FILL; @@ -112,27 +112,27 @@ BlockingBuffer::getPacket() void BlockingBuffer::setBusCmd(Packet * &pkt, Packet::Command cmd) { - MSHR *mshr = pkt->senderState; + MSHR *mshr = (MSHR*) pkt->senderState; mshr->originalCmd = pkt->cmd; if (pkt->isCacheFill()) - pkt->cmd = cmd; + pkt->cmdOverride(cmd); } void BlockingBuffer::restoreOrigCmd(Packet * &pkt) { - pkt->cmd = pkt->senderState->originalCmd; + pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd); } void BlockingBuffer::markInService(Packet * &pkt) { - if (!pkt->isCacheFill() && pkt->cmd.isWrite()) { + if (!pkt->isCacheFill() && pkt->isWrite()) { // Forwarding a write/ writeback, don't need to change // the command - assert(pkt->senderState == &wb); + assert((MSHR*)pkt->senderState == &wb); cache->clearMasterRequest(Request_WB); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { assert(wb.getNumTargets() == 0); wb.deallocate(); cache->clearBlocked(Blocked_NoWBBuffers); @@ -140,9 +140,9 @@ BlockingBuffer::markInService(Packet * &pkt) wb.inService = true; } } else { - assert(pkt->senderState == &miss); + assert((MSHR*)pkt->senderState == &miss); cache->clearMasterRequest(Request_MSHR); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { assert(miss.getNumTargets() == 0); miss.deallocate(); cache->clearBlocked(Blocked_NoMSHRs); @@ -158,24 +158,24 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) { if (pkt->isCacheFill()) { // targets were handled in the cache tags - assert(pkt->senderState == &miss); + assert((MSHR*)pkt->senderState == &miss); miss.deallocate(); cache->clearBlocked(Blocked_NoMSHRs); } else { - if (pkt->senderState->hasTargets()) { + if (((MSHR*)(pkt->senderState))->hasTargets()) { // Should only have 1 target if we had any - assert(pkt->senderState->getNumTargets() == 1); - Packet * target = pkt->senderState->getTarget(); - pkt->senderState->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1); + Packet * target = ((MSHR*)(pkt->senderState))->getTarget(); + ((MSHR*)(pkt->senderState))->popTarget(); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), target->getSize()); } cache->respond(target, time); - assert(!pkt->senderState->hasTargets()); + assert(!((MSHR*)(pkt->senderState))->hasTargets()); } - if (pkt->cmd.isWrite()) { - assert(pkt->senderState == &wb); + if (pkt->isWrite()) { + assert(((MSHR*)(pkt->senderState)) == &wb); wb.deallocate(); cache->clearBlocked(Blocked_NoWBBuffers); } else { @@ -186,15 +186,12 @@ BlockingBuffer::handleResponse(Packet * &pkt, Tick time) } void -BlockingBuffer::squash(int req->getThreadNum()ber) +BlockingBuffer::squash(int threadNum) { - if (miss.setThreadNum() == req->getThreadNum()ber) { + if (miss.threadNum == threadNum) { Packet * target = miss.getTarget(); miss.popTarget(); - assert(target->req->setThreadNum() == req->getThreadNum()ber); - if (target->completionEvent != NULL) { - delete target->completionEvent; - } + assert(target->req->getThreadNum() == threadNum); target = NULL; assert(!miss.hasTargets()); miss.ntargets=0; @@ -210,27 +207,20 @@ void BlockingBuffer::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { - // Generate request - Packet * pkt = new Packet(); - pkt->paddr = addr; - pkt->req->asid = asid; - pkt->size = size; - pkt->data = new uint8_t[size]; + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + uint8_t *new_data = new uint8_t[size]; + pkt->dataDynamicArray(new_data); if (data) { - memcpy(pkt->data, data, size); + memcpy(pkt->getPtr(), data, size); } - /** - * @todo Need to find a way to charge the writeback to the "correct" - * thread. - */ - pkt->req->setThreadNum() = 0; - pkt->cmd = Writeback; if (compressed) { pkt->flags |= COMPRESSED; } + ///All writebacks charged to same thread @todo figure this out writebacks[pkt->req->getThreadNum()]++; wb.allocateAsBuffer(pkt); @@ -249,9 +239,8 @@ BlockingBuffer::doWriteback(Packet * &pkt) // Since allocate as buffer copies the request, // need to copy data here. - if (cache->doData()) { - memcpy(wb.pkt->data, pkt->data, pkt->size); - } + memcpy(wb.pkt->getPtr(), pkt->getPtr(), pkt->getSize()); + cache->setBlocked(Blocked_NoWBBuffers); cache->setMasterRequest(Request_WB, curTick); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index d02f27d52a..34290351de 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -34,7 +34,6 @@ * Miss and writeback queue definitions. */ -#include "cpu/exec_context.hh" #include "cpu/smt.hh" //for maxThreadsPerCPU #include "mem/cache/base_cache.hh" #include "mem/cache/miss/miss_queue.hh" @@ -59,6 +58,10 @@ MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, void MissQueue::regStats(const string &name) { + Request temp_req; + Packet::Command temp_cmd = Packet::ReadReq; + Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + using namespace Stats; writebacks @@ -71,7 +74,7 @@ MissQueue::regStats(const string &name) // MSHR hit statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { Packet::Command cmd = (Packet::Command)access_idx; - const string &cstr = cmd.toString(); + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_hits[access_idx] .init(maxThreadsPerCPU) @@ -86,20 +89,20 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR hits") .flags(total) ; - demandMshrHits = mshr_hits[Read] + mshr_hits[Write]; + demandMshrHits = mshr_hits[Packet::ReadReq] + mshr_hits[Packet::WriteReq]; overallMshrHits .name(name + ".overall_mshr_hits") .desc("number of overall MSHR hits") .flags(total) ; - overallMshrHits = demandMshrHits + mshr_hits[Soft_Prefetch] + - mshr_hits[Hard_Prefetch]; + overallMshrHits = demandMshrHits + mshr_hits[Packet::SoftPFReq] + + mshr_hits[Packet::HardPFReq]; // MSHR miss statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_misses[access_idx] .init(maxThreadsPerCPU) @@ -114,20 +117,20 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR misses") .flags(total) ; - demandMshrMisses = mshr_misses[Read] + mshr_misses[Write]; + demandMshrMisses = mshr_misses[Packet::ReadReq] + mshr_misses[Packet::WriteReq]; overallMshrMisses .name(name + ".overall_mshr_misses") .desc("number of overall MSHR misses") .flags(total) ; - overallMshrMisses = demandMshrMisses + mshr_misses[Soft_Prefetch] + - mshr_misses[Hard_Prefetch]; + overallMshrMisses = demandMshrMisses + mshr_misses[Packet::SoftPFReq] + + mshr_misses[Packet::HardPFReq]; // MSHR miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_miss_latency[access_idx] .init(maxThreadsPerCPU) @@ -142,7 +145,8 @@ MissQueue::regStats(const string &name) .desc("number of demand (read+write) MSHR miss cycles") .flags(total) ; - demandMshrMissLatency = mshr_miss_latency[Read] + mshr_miss_latency[Write]; + demandMshrMissLatency = mshr_miss_latency[Packet::ReadReq] + + mshr_miss_latency[Packet::WriteReq]; overallMshrMissLatency .name(name + ".overall_mshr_miss_latency") @@ -150,12 +154,12 @@ MissQueue::regStats(const string &name) .flags(total) ; overallMshrMissLatency = demandMshrMissLatency + - mshr_miss_latency[Soft_Prefetch] + mshr_miss_latency[Hard_Prefetch]; + mshr_miss_latency[Packet::SoftPFReq] + mshr_miss_latency[Packet::HardPFReq]; // MSHR uncacheable statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_uncacheable[access_idx] .init(maxThreadsPerCPU) @@ -170,13 +174,14 @@ MissQueue::regStats(const string &name) .desc("number of overall MSHR uncacheable misses") .flags(total) ; - overallMshrUncacheable = mshr_uncacheable[Read] + mshr_uncacheable[Write] - + mshr_uncacheable[Soft_Prefetch] + mshr_uncacheable[Hard_Prefetch]; + overallMshrUncacheable = mshr_uncacheable[Packet::ReadReq] + + mshr_uncacheable[Packet::WriteReq] + mshr_uncacheable[Packet::SoftPFReq] + + mshr_uncacheable[Packet::HardPFReq]; // MSHR miss latency statistics for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshr_uncacheable_lat[access_idx] .init(maxThreadsPerCPU) @@ -191,15 +196,16 @@ MissQueue::regStats(const string &name) .desc("number of overall MSHR uncacheable cycles") .flags(total) ; - overallMshrUncacheableLatency = mshr_uncacheable_lat[Read] - + mshr_uncacheable_lat[Write] + mshr_uncacheable_lat[Soft_Prefetch] - + mshr_uncacheable_lat[Hard_Prefetch]; + overallMshrUncacheableLatency = mshr_uncacheable_lat[Packet::ReadReq] + + mshr_uncacheable_lat[Packet::WriteReq] + + mshr_uncacheable_lat[Packet::SoftPFReq] + + mshr_uncacheable_lat[Packet::HardPFReq]; #if 0 // MSHR access formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshrAccesses[access_idx] .name(name + "." + cstr + "_mshr_accesses") @@ -229,8 +235,8 @@ MissQueue::regStats(const string &name) // MSHR miss rate formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); mshrMissRate[access_idx] .name(name + "." + cstr + "_mshr_miss_rate") @@ -258,8 +264,8 @@ MissQueue::regStats(const string &name) // mshrMiss latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMshrMissLatency[access_idx] .name(name + "." + cstr + "_avg_mshr_miss_latency") @@ -287,8 +293,8 @@ MissQueue::regStats(const string &name) // mshrUncacheable latency formulas for (int access_idx = 0; access_idx < NUM_MEM_CMDS; ++access_idx) { - Packet::Command cmd = (Packet::CommandEnum)access_idx; - const string &cstr = cmd.toString(); + Packet::Command cmd = (Packet::Command)access_idx; + const string &cstr = temp_pkt.cmdIdxToString(cmd); avgMshrUncacheableLatency[access_idx] .name(name + "." + cstr + "_avg_mshr_uncacheable_latency") @@ -354,7 +360,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) if (mq.isFull()) { cache->setBlocked(Blocked_NoMSHRs); } - if (pkt->cmd != Hard_Prefetch) { + if (pkt->cmd != Packet::HardPFReq) { //If we need to request the bus (not on HW prefetch), do so cache->setMasterRequest(Request_MSHR, time); } @@ -365,18 +371,21 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time) MSHR* MissQueue::allocateWrite(Packet * &pkt, int size, Tick time) { - MSHR* mshr = wb.allocate(pkt,pkt->size); + MSHR* mshr = wb.allocate(pkt,pkt->getSize()); mshr->order = order++; - if (cache->doData()){ - if (pkt->isCompressed()) { - delete [] mshr->pkt->data; - mshr->pkt->actualSize = pkt->actualSize; - mshr->pkt->data = new uint8_t[pkt->actualSize]; - memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); - } else { - memcpy(mshr->pkt->data, pkt->data, pkt->size); - } - } + +//REMOVING COMPRESSION FOR NOW +#if 0 + if (pkt->isCompressed()) { + mshr->pkt->deleteData(); + mshr->pkt->actualSize = pkt->actualSize; + mshr->pkt->data = new uint8_t[pkt->actualSize]; + memcpy(mshr->pkt->data, pkt->data, pkt->actualSize); + } else { +#endif + memcpy(mshr->pkt->getPtr(), pkt->getPtr(), pkt->getSize()); + //{ + if (wb.isFull()) { cache->setBlocked(Blocked_NoWBBuffers); } @@ -397,15 +406,15 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) if (prefetchMiss) prefetcher->handleMiss(pkt, time); int size = blkSize; - Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); MSHR* mshr = NULL; if (!pkt->req->isUncacheable()) { - mshr = mq.findMatch(blkAddr, pkt->req->asid); + mshr = mq.findMatch(blkAddr, pkt->req->getAsid()); if (mshr) { //@todo remove hw_pf here mshr_hits[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - if (mshr->getThreadNum() != pkt->req->getThreadNum()) { - mshr->setThreadNum() = -1; + if (mshr->threadNum != pkt->req->getThreadNum()) { + mshr->threadNum = -1; } mq.allocateTarget(mshr, pkt); if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) { @@ -429,14 +438,14 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time) } else { //Count uncacheable accesses mshr_uncacheable[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; - size = pkt->size; + size = pkt->getSize(); } - if (pkt->cmd.isWrite() && (pkt->req->isUncacheable() || !writeAllocate || - pkt->cmd.isNoResponse())) { + if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate || + !pkt->needsResponse())) { /** * @todo Add write merging here. */ - mshr = allocateWrite(pkt, pkt->size, time); + mshr = allocateWrite(pkt, pkt->getSize(), time); return; } @@ -468,7 +477,7 @@ MissQueue::getPacket() pkt = wb.getReq(); // Need to search for earlier miss. MSHR *mshr = mq.findPending(pkt); - if (mshr && mshr->order < pkt->senderState->order) { + if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) { // Service misses in order until conflict is cleared. return mq.getReq(); } @@ -491,7 +500,7 @@ MissQueue::getPacket() //Update statistic on number of prefetches issued (hwpf_mshr_misses) mshr_misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; //It will request the bus for the future, but should clear that immedieatley - allocateMiss(pkt, pkt->size, curTick); + allocateMiss(pkt, pkt->getSize(), curTick); pkt = mq.getReq(); assert(pkt); //We should get back a req b/c we just put one in } @@ -503,7 +512,7 @@ void MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) { assert(pkt->senderState != 0); - MSHR * mshr = pkt->senderState; + MSHR * mshr = (MSHR*)pkt->senderState; mshr->originalCmd = pkt->cmd; if (pkt->isCacheFill() || pkt->isNoAllocate()) pkt->cmd = cmd; @@ -512,7 +521,7 @@ MissQueue::setBusCmd(Packet * &pkt, Packet::Command cmd) void MissQueue::restoreOrigCmd(Packet * &pkt) { - pkt->cmd = pkt->senderState->originalCmd; + pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd; } void @@ -526,11 +535,11 @@ MissQueue::markInService(Packet * &pkt) * @todo Should include MSHRQueue pointer in MSHR to select the correct * one. */ - if ((!pkt->isCacheFill() && pkt->cmd.isWrite()) || pkt->cmd == Copy) { + if ((!pkt->isCacheFill() && pkt->isWrite())) { // Forwarding a write/ writeback, don't need to change // the command unblock = wb.isFull(); - wb.markInService(pkt->senderState); + wb.markInService((MSHR*)pkt->senderState); if (!wb.havePending()){ cache->clearMasterRequest(Request_WB); } @@ -541,11 +550,11 @@ MissQueue::markInService(Packet * &pkt) } } else { unblock = mq.isFull(); - mq.markInService(pkt->senderState); + mq.markInService((MSHR*)pkt->senderState); if (!mq.havePending()){ cache->clearMasterRequest(Request_MSHR); } - if (pkt->senderState->originalCmd == Hard_Prefetch) { + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", cache->name()); //Also clear pending if need be @@ -568,8 +577,8 @@ MissQueue::markInService(Packet * &pkt) void MissQueue::handleResponse(Packet * &pkt, Tick time) { - MSHR* mshr = pkt->senderState; - if (pkt->senderState->originalCmd == Hard_Prefetch) { + MSHR* mshr = (MSHR*)pkt->senderState; + if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) { DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n", cache->name()); } @@ -617,8 +626,9 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) assert(num_targets == 1); Packet * target = mshr->getTarget(); mshr->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), + target->getSize()); } cache->respond(target, time); assert(!mshr->hasTargets()); @@ -629,14 +639,15 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - if (cache->doData() && pkt->cmd.isRead()) { - memcpy(target->data, pkt->data, target->size); + if (pkt->isRead()) { + memcpy(target->getPtr(), pkt->getPtr(), + target->getSize()); } cache->respond(target, time); } } - if (pkt->cmd.isWrite()) { + if (pkt->isWrite()) { // If the wrtie buffer is full, we might unblock now unblock = wb.isFull(); wb.deallocate(mshr); @@ -660,12 +671,12 @@ MissQueue::handleResponse(Packet * &pkt, Tick time) } void -MissQueue::squash(int req->getThreadNum()ber) +MissQueue::squash(int threadNum) { bool unblock = false; BlockedCause cause = NUM_BLOCKED_CAUSES; - if (noTargetMSHR && noTargetMSHR->setThreadNum() == req->getThreadNum()ber) { + if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) { noTargetMSHR = NULL; unblock = true; cause = Blocked_NoTargets; @@ -674,7 +685,7 @@ MissQueue::squash(int req->getThreadNum()ber) unblock = true; cause = Blocked_NoMSHRs; } - mq.squash(req->getThreadNum()ber); + mq.squash(threadNum); if (!mq.havePending()) { cache->clearMasterRequest(Request_MSHR); } @@ -701,9 +712,19 @@ MissQueue::doWriteback(Addr addr, int asid, int size, uint8_t *data, bool compressed) { // Generate request - Packet * pkt = buildWritebackReq(addr, asid, size, data, - compressed); + Request * req = new Request(addr, size, 0); + Packet * pkt = new Packet(req, Packet::Writeback, -1); + uint8_t *new_data = new uint8_t[size]; + pkt->dataDynamicArray(new_data); + if (data) { + memcpy(pkt->getPtr(), data, size); + } + if (compressed) { + pkt->flags |= COMPRESSED; + } + + ///All writebacks charged to same thread @todo figure this out writebacks[pkt->req->getThreadNum()]++; allocateWrite(pkt, 0, curTick); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 5c3c9fd1d0..fe8cbeea45 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -50,13 +50,15 @@ MSHR::MSHR() { inService = false; ntargets = 0; - setThreadNum() = -1; + threadNum = -1; } void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { + assert("NEED TO FIX YET\n" && 0); +#if 0 assert(targets.empty()); addr = _addr; asid = _asid; @@ -74,6 +76,7 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, pkt->req = target->req; allocateTarget(target); } +#endif } // Since we aren't sure if data is being used, don't copy here. @@ -83,17 +86,13 @@ MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, void MSHR::allocateAsBuffer(Packet * &target) { - addr = target->paddr; - asid = target->req->asid; - setThreadNum() = target->req->getThreadNum(); - pkt = new Packet(); - pkt->addr = target->addr; - pkt->dest = target->dest; - pkt->cmd = target->cmd; - pkt->size = target->size; - pkt->req = target->req; - pkt->data = new uint8_t[target->size]; - pkt->senderState = this; + addr = target->getAddr(); + asid = target->req->getAsid(); + threadNum = target->req->getThreadNum(); + pkt = new Packet(target->req, target->cmd, -1); + uint8_t *new_data = new uint8_t[target->getSize()]; + pkt->dataDynamicArray(new_data); + pkt->senderState = (Packet::SenderState*)this; pkt->time = curTick; } @@ -117,11 +116,11 @@ MSHR::allocateTarget(Packet * &target) //If we append an invalidate and we issued a read to the bus, //but now have some pending writes, we need to move //the invalidate to before the first non-read - if (inService && pkt->cmd.isRead() && target->cmd.isInvalidate()) { + if (inService && pkt->isRead() && target->isInvalidate()) { std::list temp; while (!targets.empty()) { - if (!targets.front()->cmd.isRead()) break; + if (!targets.front()->isRead()) break; //Place on top of temp stack temp.push_front(targets.front()); //Remove from targets @@ -148,8 +147,8 @@ MSHR::allocateTarget(Packet * &target) * @todo really prioritize the target commands. */ - if (!inService && target->cmd.isWrite()) { - pkt->cmd = WriteReq; + if (!inService && target->isWrite()) { + pkt->cmd = Packet::WriteReq; } } @@ -162,14 +161,14 @@ MSHR::dump() "inService: %d thread: %d\n" "Addr: %x asid: %d ntargets %d\n" "Targets:\n", - inService, getThreadNum(), addr, asid, ntargets); + inService, threadNum, addr, asid, ntargets); TargetListIterator tar_it = targets.begin(); for (int i = 0; i < ntargets; i++) { assert(tar_it != targets.end()); ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n", - i, (*tar_it)->paddr, (*tar_it)->cmdToIndex()); + i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex()); tar_it++; } diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index 3bd6d36d15..167aa26cd1 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -66,7 +66,7 @@ class MSHR { /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ - int getThreadNum(); + int threadNum; /** The request that is forwarded to the next level of the hierarchy. */ Packet * pkt; /** The number of currently allocated targets. */ diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc index ced43d30af..6516a99f85 100644 --- a/src/mem/cache/miss/mshr_queue.cc +++ b/src/mem/cache/miss/mshr_queue.cc @@ -94,17 +94,19 @@ MSHRQueue::findPending(Packet * &pkt) const MSHR::ConstIterator end = pendingList.end(); for (; i != end; ++i) { MSHR *mshr = *i; - if (mshr->addr < pkt->addr) { - if (mshr->addr + mshr->pkt->size > pkt->addr) { + if (mshr->addr < pkt->getAddr()) { + if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) { return mshr; } } else { - if (pkt->addr + pkt->size > mshr->addr) { + if (pkt->getAddr() + pkt->getSize() > mshr->addr) { return mshr; } } //need to check destination address for copies. + //TEMP NOT DOING COPIES +#if 0 if (mshr->pkt->cmd == Copy) { Addr dest = mshr->pkt->dest; if (dest < pkt->addr) { @@ -117,6 +119,7 @@ MSHRQueue::findPending(Packet * &pkt) const } } } +#endif } return NULL; } @@ -124,16 +127,16 @@ MSHRQueue::findPending(Packet * &pkt) const MSHR* MSHRQueue::allocate(Packet * &pkt, int size) { - Addr aligned_addr = pkt->addr & ~((Addr)size - 1); + Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1); MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - if (pkt->cmd.isNoResponse()) { + if (!pkt->needsResponse()) { mshr->allocateAsBuffer(pkt); } else { assert(size !=0); - mshr->allocate(pkt->cmd, aligned_addr, pkt->req->req->asid, size, pkt); + mshr->allocate(pkt->cmd, aligned_addr, pkt->req->getAsid(), size, pkt); allocatedTargets += 1; } mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); @@ -149,7 +152,7 @@ MSHRQueue::allocateFetch(Addr addr, int asid, int size, Packet * &target) MSHR *mshr = freeList.front(); assert(mshr->getNumTargets() == 0); freeList.pop_front(); - mshr->allocate(Read, addr, asid, size, target); + mshr->allocate(Packet::ReadReq, addr, asid, size, target); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->readyIter = pendingList.insert(pendingList.end(), mshr); @@ -164,7 +167,7 @@ MSHRQueue::allocateTargetList(Addr addr, int asid, int size) assert(mshr->getNumTargets() == 0); freeList.pop_front(); Packet * dummy; - mshr->allocate(Read, addr, asid, size, dummy); + mshr->allocate(Packet::ReadReq, addr, asid, size, dummy); mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr); mshr->inService = true; ++inServiceMSHRs; @@ -209,7 +212,7 @@ void MSHRQueue::markInService(MSHR* mshr) { //assert(mshr == pendingList.front()); - if (mshr->pkt->cmd.isNoResponse()) { + if (!mshr->pkt->needsResponse()) { assert(mshr->getNumTargets() == 0); deallocate(mshr); return; @@ -237,21 +240,18 @@ MSHRQueue::markPending(MSHR* mshr, Packet::Command cmd) } void -MSHRQueue::squash(int req->getThreadNum()ber) +MSHRQueue::squash(int threadNum) { MSHR::Iterator i = allocatedList.begin(); MSHR::Iterator end = allocatedList.end(); for (; i != end;) { MSHR *mshr = *i; - if (mshr->setThreadNum() == req->getThreadNum()ber) { + if (mshr->threadNum == threadNum) { while (mshr->hasTargets()) { Packet * target = mshr->getTarget(); mshr->popTarget(); - assert(target->req->setThreadNum() == req->getThreadNum()ber); - if (target->completionEvent != NULL) { - delete target->completionEvent; - } + assert(target->req->getThreadNum() == threadNum); target = NULL; } assert(!mshr->hasTargets()); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 3d37686768..0369745c9b 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -52,6 +52,9 @@ typedef std::list PacketList; #define NACKED_LINE 1 << 0 #define SATISFIED 1 << 1 #define SHARED_LINE 1 << 2 +#define CACHE_LINE_FILL 1 << 3 +#define COMPRESSED 1 << 4 +#define NO_ALLOCATE 1 << 5 //For statistics we need max number of commands, hard code it at //20 for now. @todo fix later @@ -66,6 +69,10 @@ typedef std::list PacketList; */ class Packet { + public: + /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */ + uint64_t flags; + private: /** A pointer to the data being transfered. It can be differnt * sizes at each level of the heirarchy so it belongs in the @@ -93,6 +100,9 @@ class Packet /** The size of the request or transfer. */ int size; + /** The offset within the block that represents the data. */ + int offset; + /** Device address (e.g., bus ID) of the source of the * transaction. The source is not responsible for setting this * field; it is set implicitly by the interconnect when the @@ -110,6 +120,9 @@ class Packet bool addrSizeValid; /** Is the 'src' field valid? */ bool srcValid; + /** Is the offset valid. */ + bool offsetValid; + public: @@ -171,6 +184,7 @@ class Packet /** List of all commands associated with a packet. */ enum Command { + InvalidCmd = 0, ReadReq = IsRead | IsRequest | NeedsResponse, WriteReq = IsWrite | IsRequest | NeedsResponse, WriteReqNoAck = IsWrite | IsRequest, @@ -183,7 +197,10 @@ class Packet HardPFResp = IsRead | IsRequest | IsHWPrefetch | IsResponse, InvalidateReq = IsInvalidate | IsRequest, WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest, - UpgradeReq = IsInvalidate | NeedsResponse + UpgradeReq = IsInvalidate | NeedsResponse, + UpgradeResp = IsInvalidate | IsResponse, + ReadExReq = IsRead | IsInvalidate | NeedsResponse, + ReadExResp = IsRead | IsInvalidate | IsResponse }; /** Return the string name of the cmd field (for debugging and @@ -206,8 +223,8 @@ class Packet bool needsResponse() { return (cmd & NeedsResponse) != 0; } bool isInvalidate() { return (cmd * IsInvalidate) != 0; } - bool isCacheFill() { assert("Unimplemented yet\n" && 0); } - bool isNoAllocate() { assert("Unimplemented yet\n" && 0); } + bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } + bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } /** Possible results of a packet's request. */ enum Result @@ -232,6 +249,10 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } + int getOffset() const { assert(offsetValid); return offset; } + + void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } + void cmdOverride(Command newCmd) { cmd = newCmd; } /** Constructor. Note that a Request object must be constructed * first, but the Requests's physical address and size fields @@ -241,10 +262,25 @@ class Packet : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), addrSizeValid(_req->validPaddr), - srcValid(false), + srcValid(false), offsetValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { + flags = 0; + } + + /** Alternate constructor if you are trying to create a packet with + * a request that is for a whole block, not the address from the req. + * this allows for overriding the size/addr of the req.*/ + Packet(Request *_req, Command _cmd, short _dest, int _blkSize) + : data(NULL), staticData(false), dynamicData(false), arrayData(false), + addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), + offset(_req->paddr & (_blkSize - 1)), dest(_dest), + addrSizeValid(_req->validPaddr), srcValid(false), offsetValid(true), + req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), + result(Unknown) + { + flags = 0; } /** Destructor. */ From 335fa4bde33f60bf61dceb04eb61aeade5cee76c Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 10:25:25 -0400 Subject: [PATCH 05/17] All files compile in the mem directory except cache_builder Missing some functionality (like split caches and copy support) src/SConscript: Typo src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/prefetch/ghb_prefetcher.hh: src/mem/cache/prefetch/stride_prefetcher.hh: src/mem/cache/prefetch/tagged_prefetcher_impl.hh: src/mem/cache/tags/fa_lru.cc: src/mem/cache/tags/fa_lru.hh: src/mem/cache/tags/iic.cc: src/mem/cache/tags/iic.hh: src/mem/cache/tags/lru.cc: src/mem/cache/tags/lru.hh: src/mem/cache/tags/split.cc: src/mem/cache/tags/split.hh: src/mem/cache/tags/split_lifo.cc: src/mem/cache/tags/split_lifo.hh: src/mem/cache/tags/split_lru.cc: src/mem/cache/tags/split_lru.hh: src/mem/packet.hh: src/mem/request.hh: Fix so it compiles --HG-- extra : convert_revision : 0d87d84f6e9445bab655c0cb0f8541bbf6eab904 --- src/SConscript | 2 +- src/mem/cache/prefetch/base_prefetcher.cc | 26 +++++----- src/mem/cache/prefetch/ghb_prefetcher.hh | 4 +- src/mem/cache/prefetch/stride_prefetcher.hh | 2 +- .../cache/prefetch/tagged_prefetcher_impl.hh | 2 +- src/mem/cache/tags/fa_lru.cc | 10 ++-- src/mem/cache/tags/fa_lru.hh | 8 +-- src/mem/cache/tags/iic.cc | 52 +++++++++++-------- src/mem/cache/tags/iic.hh | 14 ++--- src/mem/cache/tags/lru.cc | 45 ++++++++-------- src/mem/cache/tags/lru.hh | 8 +-- src/mem/cache/tags/split.cc | 26 +++++----- src/mem/cache/tags/split.hh | 8 +-- src/mem/cache/tags/split_lifo.cc | 18 ++++--- src/mem/cache/tags/split_lifo.hh | 8 +-- src/mem/cache/tags/split_lru.cc | 18 ++++--- src/mem/cache/tags/split_lru.hh | 8 +-- src/mem/packet.hh | 16 +++--- src/mem/request.hh | 4 ++ 19 files changed, 147 insertions(+), 132 deletions(-) diff --git a/src/SConscript b/src/SConscript index 04da17ee63..e20aca0d73 100644 --- a/src/SConscript +++ b/src/SConscript @@ -117,7 +117,7 @@ base_sources = Split(''' mem/cache/tags/base_tags.cc mem/cache/tags/cache_tags.cc mem/cache/tags/fa_lru.cc - mem/cache/tags/iic/cc + mem/cache/tags/iic.cc mem/cache/tags/lru.cc mem/cache/tags/repl/gen.cc mem/cache/tags/repl/repl.cc diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 7b2d57cd53..29da537468 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -36,6 +36,7 @@ #include "base/trace.hh" #include "mem/cache/base_cache.hh" #include "mem/cache/prefetch/base_prefetcher.hh" +#include "mem/request.hh" #include BasePrefetcher::BasePrefetcher(int size, bool pageStop, bool serialSquash, @@ -132,10 +133,10 @@ BasePrefetcher::getPacket() void BasePrefetcher::handleMiss(Packet * &pkt, Tick time) { - if (!pkt->req->isUncacheable() && !(pkt->isInstRead() && only_data)) + if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data)) { //Calculate the blk address - Addr blkAddr = pkt->paddr & ~(Addr)(blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); //Check if miss is in pfq, if so remove it std::list::iterator iter = inPrefetch(blkAddr); @@ -177,15 +178,14 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) //temp calc this here... pfIdentified++; //create a prefetch memreq + Request * prefetchReq = new Request(*addr, blkSize, 0); Packet * prefetch; - prefetch = new Packet(); - prefetch->paddr = (*addr); - prefetch->size = blkSize; - prefetch->cmd = Hard_Prefetch; - prefetch->xc = pkt->xc; - prefetch->data = new uint8_t[blkSize]; - prefetch->req->asid = pkt->req->asid; - prefetch->req->setThreadNum() = pkt->req->getThreadNum(); + prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1); + uint8_t *new_data = new uint8_t[blkSize]; + prefetch->dataDynamicArray(new_data); + prefetch->req->setThreadContext(pkt->req->getCpuNum(), + pkt->req->getThreadNum()); + prefetch->time = time + (*delay); //@todo ADD LATENCY HERE //... initialize @@ -199,14 +199,14 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) } //Check if it is already in the miss_queue - if (inMissQueue(prefetch->paddr, prefetch->req->asid)) { + if (inMissQueue(prefetch->getAddr(), prefetch->req->getAsid())) { addr++; delay++; continue; } //Check if it is already in the pf buffer - if (inPrefetch(prefetch->paddr) != pf.end()) { + if (inPrefetch(prefetch->getAddr()) != pf.end()) { pfBufferHit++; addr++; delay++; @@ -240,7 +240,7 @@ BasePrefetcher::inPrefetch(Addr address) //Guaranteed to only be one match, we always check before inserting std::list::iterator iter; for (iter=pf.begin(); iter != pf.end(); iter++) { - if (((*iter)->paddr & ~(Addr)(blkSize-1)) == address) { + if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) { return iter; } } diff --git a/src/mem/cache/prefetch/ghb_prefetcher.hh b/src/mem/cache/prefetch/ghb_prefetcher.hh index f25ebe1664..c22b763d1b 100644 --- a/src/mem/cache/prefetch/ghb_prefetcher.hh +++ b/src/mem/cache/prefetch/ghb_prefetcher.hh @@ -78,8 +78,8 @@ class GHBPrefetcher : public Prefetcher void calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int cpuID = pkt->cpu_num; + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); + int cpuID = pkt->req->getCpuNum(); if (!useCPUId) cpuID = 0; diff --git a/src/mem/cache/prefetch/stride_prefetcher.hh b/src/mem/cache/prefetch/stride_prefetcher.hh index f897762151..4a8ee7de48 100644 --- a/src/mem/cache/prefetch/stride_prefetcher.hh +++ b/src/mem/cache/prefetch/stride_prefetcher.hh @@ -96,7 +96,7 @@ class StridePrefetcher : public Prefetcher std::list &delays) { // Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int cpuID = pkt->cpu_num; + int cpuID = pkt->req->getCpuNum(); if (!useCPUId) cpuID = 0; /* Scan Table for IAddr Match */ diff --git a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh index 9e46ba8937..db5c94820c 100644 --- a/src/mem/cache/prefetch/tagged_prefetcher_impl.hh +++ b/src/mem/cache/prefetch/tagged_prefetcher_impl.hh @@ -52,7 +52,7 @@ TaggedPrefetcher:: calculatePrefetch(Packet * &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); for (int d=1; d <= degree; d++) { Addr newAddr = blkAddr + d*(this->blkSize); diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 43ab363095..82d2c410d0 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -39,6 +39,7 @@ #include "mem/cache/tags/fa_lru.hh" #include "base/intmath.hh" +#include "base/misc.hh" using namespace std; @@ -204,7 +205,7 @@ FALRU::findBlock(Addr addr, int asid, int &lat, int *inCache) FALRUBlk* FALRU::findBlock(Packet * &pkt, int &lat, int *inCache) { - Addr addr = pkt->paddr; + Addr addr = pkt->getAddr(); accesses++; int tmp_in_cache = 0; @@ -255,17 +256,16 @@ FALRU::findBlock(Addr addr, int asid) const } FALRUBlk* -FALRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +FALRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { FALRUBlk * blk = tail; assert(blk->inCache == 0); moveToHead(blk); tagHash.erase(blk->tag); - tagHash[blkAlign(pkt->paddr)] = blk; + tagHash[blkAlign(pkt->getAddr())] = blk; if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; } else { tagsInUse++; blk->isTouched = true; diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index 7855f84550..566e36c277 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -215,7 +215,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - FALRUBlk* findReplacement(Packet * &pkt, PacketList* & writebacks, + FALRUBlk* findReplacement(Packet * &pkt, PacketList & writebacks, BlkList &compress_blocks); /** @@ -319,7 +319,7 @@ public: * needed when writing to a compressed block. */ void writeData(FALRUBlk *blk, uint8_t *data, int size, - PacketList* &writebacks) + PacketList &writebacks) { } @@ -330,14 +330,14 @@ public: * @param asid The address space ID. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { } /** * Unimplemented. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index f4641401fc..0071ca2837 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -287,8 +287,8 @@ IIC::findBlock(Addr addr, int asid, int &lat) IICTag* IIC::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = hash(addr); @@ -363,11 +363,11 @@ IIC::findBlock(Addr addr, int asid) const IICTag* -IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, +IIC::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - DPRINTF(IIC, "Finding Replacement for %x\n", pkt->paddr); - unsigned set = hash(pkt->paddr); + DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr()); + unsigned set = hash(pkt->getAddr()); IICTag *tag_ptr; unsigned long *tmp_data = new unsigned long[numSub]; @@ -405,7 +405,7 @@ IIC::findReplacement(Packet * &pkt, PacketList* &writebacks, } void -IIC::freeReplacementBlock(PacketList* & writebacks) +IIC::freeReplacementBlock(PacketList & writebacks) { IICTag *tag_ptr; unsigned long data_ptr; @@ -418,18 +418,23 @@ IIC::freeReplacementBlock(PacketList* & writebacks) tag_ptr->isModified() ? "writeback" : "clean"); /* write back replaced block data */ if (tag_ptr && (tag_ptr->isValid())) { - int req->setThreadNum() = (tag_ptr->xc) ? tag_ptr->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += tag_ptr->refCount; ++sampledRefs; tag_ptr->refCount = 0; if (tag_ptr->isModified()) { - Packet * writeback = +/* Packet * writeback = buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0), tag_ptr->req->asid, tag_ptr->xc, blkSize, - (cache->doData())?tag_ptr->data:0, + tag_ptr->data, tag_ptr->size); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->dataDynamic(tag_ptr->data); + writebacks.push_back(writeback); } } @@ -446,7 +451,7 @@ IIC::freeReplacementBlock(PacketList* & writebacks) } unsigned long -IIC::getFreeDataBlock(PacketList* & writebacks) +IIC::getFreeDataBlock(PacketList & writebacks) { struct IICTag *tag_ptr; unsigned long data_ptr; @@ -466,7 +471,7 @@ IIC::getFreeDataBlock(PacketList* & writebacks) IICTag* -IIC::getFreeTag(int set, PacketList* & writebacks) +IIC::getFreeTag(int set, PacketList & writebacks) { unsigned long tag_index; IICTag *tag_ptr; @@ -708,7 +713,7 @@ IIC::invalidateBlk(int asid, Addr addr) void IIC::readData(IICTag *blk, uint8_t *data){ - assert(cache->doData()); +// assert(cache->doData()); assert(blk->size <= trivialSize || blk->numData > 0); int data_size = blk->size; if (data_size > trivialSize) { @@ -725,8 +730,8 @@ IIC::readData(IICTag *blk, uint8_t *data){ void IIC::writeData(IICTag *blk, uint8_t *write_data, int size, - PacketList* & writebacks){ - assert(cache->doData()); + PacketList & writebacks){ +// assert(cache->doData()); assert(size < blkSize || !blk->isCompressed()); DPRINTF(IIC, "Writing %d bytes to %x\n", size, blk->tag<re = (void*) repl->add(dest_tag - tagStore); dest_tag->set = hash(dest); dest_tag->tag = extractTag(dest); - dest_tag->req->asid = asid; + dest_tag->asid = asid; dest_tag->status = BlkValid | BlkWritable; } // Find the source tag here since it might move if we need to find a @@ -823,15 +830,17 @@ IIC::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) } else { dest_tag->status &= ~BlkCompressed; } +#endif } void -IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) +IIC::fixCopy(Packet * &pkt, PacketList &writebacks) { +#if 0 // if reference counter is greater than 1, do copy // else do write - Addr blk_addr = blkAlign(pkt->paddr); - IICTag* blk = findBlock(blk_addr, pkt->req->asid); + Addr blk_addr = blkAlign(pkt->getAddr); + IICTag* blk = findBlock(blk_addr, pkt->req->getAsid()); if (blk->numData > 0 && dataReferenceCount[blk->data_ptr[0]] != 1) { // copy the data @@ -843,7 +852,7 @@ IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) /** * @todo Remove this refetch once we change IIC to pointer based */ - blk = findBlock(blk_addr, pkt->req->asid); + blk = findBlock(blk_addr, pkt->req->getAsid()); assert(blk); if (cache->doData()) { memcpy(&(dataBlks[new_data][0]), @@ -855,6 +864,7 @@ IIC::fixCopy(Packet * &pkt, PacketList* &writebacks) blk->data_ptr[i] = new_data; } } +#endif } void diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh index ef3f03c534..6628f7e7a9 100644 --- a/src/mem/cache/tags/iic.hh +++ b/src/mem/cache/tags/iic.hh @@ -475,7 +475,7 @@ class IIC : public BaseTags * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - IICTag* findReplacement(Packet * &pkt, PacketList* &writebacks, + IICTag* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -495,7 +495,7 @@ class IIC : public BaseTags * needed when writing to a compressed block. */ void writeData(IICTag *blk, uint8_t *data, int size, - PacketList* & writebacks); + PacketList & writebacks); /** * Perform a block aligned copy from the source address to the destination. @@ -504,14 +504,14 @@ class IIC : public BaseTags * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * If a block is currently marked copy on write, copy it before writing. * @param req The write request. * @param writebacks List for any generated writeback requests. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks); + void fixCopy(Packet * &pkt, PacketList &writebacks); /** * Called at end of simulation to complete average block reference stats. @@ -541,14 +541,14 @@ private: * Free the resources associated with the next replacement block. * @param writebacks A list of any writebacks to perform. */ - void freeReplacementBlock(PacketList* & writebacks); + void freeReplacementBlock(PacketList & writebacks); /** * Return the pointer to a free data block. * @param writebacks A list of any writebacks to perform. * @return A pointer to a free data block. */ - unsigned long getFreeDataBlock(PacketList* & writebacks); + unsigned long getFreeDataBlock(PacketList & writebacks); /** * Get a free tag in the given hash set. @@ -556,7 +556,7 @@ private: * @param writebacks A list of any writebacks to perform. * @return a pointer to a free tag. */ - IICTag* getFreeTag(int set, PacketList* & writebacks); + IICTag* getFreeTag(int set, PacketList & writebacks); /** * Free the resources associated with the given tag. diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 19a52aade7..81b84e11e8 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -135,7 +135,7 @@ LRU::LRU(int _numSets, int _blkSize, int _assoc, int _hit_latency) : // table; won't matter because the block is invalid blk->tag = j; blk->whenReady = 0; - blk->req->asid = -1; + blk->asid = -1; blk->isTouched = false; blk->size = blkSize; sets[i].blks[j]=blk; @@ -187,8 +187,8 @@ LRU::findBlock(Addr addr, int asid, int &lat) LRUBlk* LRU::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -217,16 +217,15 @@ LRU::findBlock(Addr addr, int asid) const } LRUBlk* -LRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +LRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); // grab a replacement candidate LRUBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -254,7 +253,7 @@ LRU::invalidateBlk(int asid, Addr addr) } void -LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); @@ -263,29 +262,31 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) LRUBlk *dest_blk = findBlock(dest, asid); if (dest_blk == NULL) { // Need to do a replacement - Packet * pkt = new Packet(); - pkt->paddr = dest; + Request *search = new Request(dest,1,0); + Packet * pkt = new Packet(search, Packet::ReadReq, -1); BlkList dummy_list; dest_blk = findReplacement(pkt, writebacks, dummy_list); if (dest_blk->isValid() && dest_blk->isModified()) { // Need to writeback data. - pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, +/* pkt = buildWritebackReq(regenerateBlkAddr(dest_blk->tag, dest_blk->set), dest_blk->req->asid, dest_blk->xc, blkSize, - (cache->doData())?dest_blk->data:0, + dest_blk->data, dest_blk->size); - writebacks.push_back(pkt); +*/ + Request *writebackReq = new Request(regenerateBlkAddr(dest_blk->tag, + dest_blk->set), + blkSize, 0); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->dataDynamic(dest_blk->data); + writebacks.push_back(writeback); } dest_blk->tag = extractTag(dest); - dest_blk->req->asid = asid; - /** - * @todo Do we need to pass in the execution context, or can we - * assume its the same? - */ - assert(source_blk->xc); - dest_blk->xc = source_blk->xc; + dest_blk->asid = asid; + delete search; + delete pkt; } /** * @todo Can't assume the status once we have coherence on copies. @@ -293,9 +294,7 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) // Set this block as readable, writeable, and dirty. dest_blk->status = 7; - if (cache->doData()) { - memcpy(dest_blk->data, source_blk->data, blkSize); - } + memcpy(dest_blk->data, source_blk->data, blkSize); } void diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index 9b4a557772..437244660e 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -201,7 +201,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - LRUBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + LRUBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -296,7 +296,7 @@ public: * needed when writing to a compressed block. */ void writeData(LRUBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -309,12 +309,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc index 9d9036abb2..bf23fb8cb3 100644 --- a/src/mem/cache/tags/split.cc +++ b/src/mem/cache/tags/split.cc @@ -270,30 +270,30 @@ SplitBlk* Split::findBlock(Packet * &pkt, int &lat) { - Addr aligned = blkAlign(pkt->paddr); + Addr aligned = blkAlign(pkt->getAddr()); if (memHash.count(aligned)) { memHash[aligned]++; - } else if (pkt->nic_pkt) { + } else if (pkt->nic_pkt()) { memHash[aligned] = 1; } - SplitBlk *blk = lru->findBlock(pkt->paddr, pkt->req->asid, lat); + SplitBlk *blk = lru->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); if (blk) { - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { NR_CP_hits++; } else { CR_CP_hits++; } } else { if (lifo && lifo_net) { - blk = lifo_net->findBlock(pkt->paddr, pkt->req->asid, lat); + blk = lifo_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); } else if (lru_net) { - blk = lru_net->findBlock(pkt->paddr, pkt->req->asid, lat); + blk = lru_net->findBlock(pkt->getAddr(), pkt->req->getAsid(), lat); } if (blk) { - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { NR_NP_hits++; } else { CR_NP_hits++; @@ -304,7 +304,7 @@ Split::findBlock(Packet * &pkt, int &lat) if (blk) { Tick latency = curTick - blk->ts; if (blk->isNIC) { - if (!blk->isUsed && !pkt->nic_pkt) { + if (!blk->isUsed && !pkt->nic_pkt()) { useByCPUCycleDist.sample(latency); nicUseByCPUCycleTotal += latency; nicBlksUsedByCPU++; @@ -312,7 +312,7 @@ Split::findBlock(Packet * &pkt, int &lat) } blk->isUsed = true; - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { DPRINTF(Split, "found block in partition %d\n", blk->part); } } @@ -350,12 +350,12 @@ Split::findBlock(Addr addr, int asid) const } SplitBlk* -Split::findReplacement(Packet * &pkt, PacketList* &writebacks, +Split::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { SplitBlk *blk; - if (pkt->nic_pkt) { + if (pkt->nic_pkt()) { DPRINTF(Split, "finding a replacement for nic_req\n"); nic_repl++; if (lifo && lifo_net) @@ -397,7 +397,7 @@ Split::findReplacement(Packet * &pkt, PacketList* &writebacks, // blk attributes for the new blk coming IN blk->ts = curTick; - blk->isNIC = (pkt->nic_pkt) ? true : false; + blk->isNIC = (pkt->nic_pkt()) ? true : false; return blk; } @@ -422,7 +422,7 @@ Split::invalidateBlk(int asid, Addr addr) } void -Split::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +Split::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { if (lru->probe(asid, source)) lru->doCopy(source, dest, asid, writebacks); diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh index 6f2441597d..5e03402690 100644 --- a/src/mem/cache/tags/split.hh +++ b/src/mem/cache/tags/split.hh @@ -224,7 +224,7 @@ class Split : public BaseTags * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); @@ -304,7 +304,7 @@ class Split : public BaseTags * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -317,12 +317,12 @@ class Split : public BaseTags * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc index c6bb91eff1..f6493fdd28 100644 --- a/src/mem/cache/tags/split_lifo.cc +++ b/src/mem/cache/tags/split_lifo.cc @@ -257,8 +257,8 @@ SplitLIFO::findBlock(Addr addr, int asid, int &lat) SplitBlk* SplitLIFO::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -292,10 +292,10 @@ SplitLIFO::findBlock(Addr addr, int asid) const } SplitBlk* -SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, +SplitLIFO::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); SplitBlk *firstIn = sets[set].firstIn; SplitBlk *lastIn = sets[set].lastIn; @@ -315,10 +315,9 @@ SplitLIFO::findReplacement(Packet * &pkt, PacketList* &writebacks, } DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n", - pkt->paddr, regenerateBlkAddr(blk->tag, set), blk->status); + pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -349,8 +348,10 @@ SplitLIFO::invalidateBlk(int asid, Addr addr) } void -SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { +//Copy Unsuported for now +#if 0 assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); SplitBlk *source_blk = findBlock(source, asid); @@ -391,6 +392,7 @@ SplitLIFO::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) if (cache->doData()) { memcpy(dest_blk->data, source_blk->data, blkSize); } +#endif } void diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh index c50eaa53db..dfcaa0b679 100644 --- a/src/mem/cache/tags/split_lifo.hh +++ b/src/mem/cache/tags/split_lifo.hh @@ -224,7 +224,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -319,7 +319,7 @@ public: * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -332,12 +332,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc index 4b7f4c1147..7fc7420016 100644 --- a/src/mem/cache/tags/split_lru.cc +++ b/src/mem/cache/tags/split_lru.cc @@ -135,7 +135,7 @@ SplitLRU::SplitLRU(int _numSets, int _blkSize, int _assoc, int _hit_latency, int // table; won't matter because the block is invalid blk->tag = j; blk->whenReady = 0; - blk->req->asid = -1; + blk->asid = -1; blk->isTouched = false; blk->size = blkSize; sets[i].blks[j]=blk; @@ -206,8 +206,8 @@ SplitLRU::findBlock(Addr addr, int asid, int &lat) SplitBlk* SplitLRU::findBlock(Packet * &pkt, int &lat) { - Addr addr = pkt->paddr; - int asid = pkt->req->asid; + Addr addr = pkt->getAddr(); + int asid = pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); @@ -236,16 +236,15 @@ SplitLRU::findBlock(Addr addr, int asid) const } SplitBlk* -SplitLRU::findReplacement(Packet * &pkt, PacketList* &writebacks, +SplitLRU::findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks) { - unsigned set = extractSet(pkt->paddr); + unsigned set = extractSet(pkt->getAddr()); // grab a replacement candidate SplitBlk *blk = sets[set].blks[assoc-1]; sets[set].moveToHead(blk); if (blk->isValid()) { - int req->setThreadNum() = (blk->xc) ? blk->xc->getThreadNum() : 0; - replacements[req->getThreadNum()]++; + replacements[0]++; totalRefs += blk->refCount; ++sampledRefs; blk->refCount = 0; @@ -275,8 +274,10 @@ SplitLRU::invalidateBlk(int asid, Addr addr) } void -SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) +SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) { +//Copy not supported for now +#if 0 assert(source == blkAlign(source)); assert(dest == blkAlign(dest)); SplitBlk *source_blk = findBlock(source, asid); @@ -317,6 +318,7 @@ SplitLRU::doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks) if (cache->doData()) { memcpy(dest_blk->data, source_blk->data, blkSize); } +#endif } void diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh index 1c0fc8600a..03886b1d8d 100644 --- a/src/mem/cache/tags/split_lru.hh +++ b/src/mem/cache/tags/split_lru.hh @@ -207,7 +207,7 @@ public: * @param compress_blocks List of blocks to compress, for adaptive comp. * @return The block to place the replacement in. */ - SplitBlk* findReplacement(Packet * &pkt, PacketList* &writebacks, + SplitBlk* findReplacement(Packet * &pkt, PacketList &writebacks, BlkList &compress_blocks); /** @@ -302,7 +302,7 @@ public: * needed when writing to a compressed block. */ void writeData(SplitBlk *blk, uint8_t *data, int size, - PacketList* & writebacks) + PacketList & writebacks) { assert(size <= blkSize); blk->size = size; @@ -315,12 +315,12 @@ public: * @param asid The address space DI. * @param writebacks List for any generated writeback requests. */ - void doCopy(Addr source, Addr dest, int asid, PacketList* &writebacks); + void doCopy(Addr source, Addr dest, int asid, PacketList &writebacks); /** * No impl. */ - void fixCopy(Packet * &pkt, PacketList* &writebacks) + void fixCopy(Packet * &pkt, PacketList &writebacks) { } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 0369745c9b..2b97ab0c1d 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -100,9 +100,6 @@ class Packet /** The size of the request or transfer. */ int size; - /** The offset within the block that represents the data. */ - int offset; - /** Device address (e.g., bus ID) of the source of the * transaction. The source is not responsible for setting this * field; it is set implicitly by the interconnect when the @@ -120,8 +117,6 @@ class Packet bool addrSizeValid; /** Is the 'src' field valid? */ bool srcValid; - /** Is the offset valid. */ - bool offsetValid; public: @@ -225,6 +220,9 @@ class Packet bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; } bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; } + bool isCompressed() { return (flags & COMPRESSED) != 0; } + + bool nic_pkt() { assert("Unimplemented\n" && 0); } /** Possible results of a packet's request. */ enum Result @@ -249,7 +247,7 @@ class Packet Addr getAddr() const { assert(addrSizeValid); return addr; } int getSize() const { assert(addrSizeValid); return size; } - int getOffset() const { assert(offsetValid); return offset; } + Addr getOffset(int blkSize) const { return req->getPaddr() & (Addr)(blkSize - 1); } void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; } void cmdOverride(Command newCmd) { cmd = newCmd; } @@ -262,7 +260,7 @@ class Packet : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr), size(_req->size), dest(_dest), addrSizeValid(_req->validPaddr), - srcValid(false), offsetValid(false), + srcValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { @@ -275,8 +273,8 @@ class Packet Packet(Request *_req, Command _cmd, short _dest, int _blkSize) : data(NULL), staticData(false), dynamicData(false), arrayData(false), addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), - offset(_req->paddr & (_blkSize - 1)), dest(_dest), - addrSizeValid(_req->validPaddr), srcValid(false), offsetValid(true), + dest(_dest), + addrSizeValid(_req->validPaddr), srcValid(false), req(_req), coherence(NULL), senderState(NULL), cmd(_cmd), result(Unknown) { diff --git a/src/mem/request.hh b/src/mem/request.hh index 46d9b6fd70..a1524f8073 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -63,6 +63,8 @@ const unsigned PF_EXCLUSIVE = 0x100; const unsigned EVICT_NEXT = 0x200; /** The request should ignore unaligned access faults */ const unsigned NO_ALIGN_FAULT = 0x400; +/** The request was an instruction read. */ +const unsigned INST_READ = 0x800; class Request { @@ -228,6 +230,8 @@ class Request /** Accessor Function to Check Cacheability. */ bool isUncacheable() { return getFlags() & UNCACHEABLE; } + bool isInstRead() { return getFlags() & INST_READ; } + friend class Packet; }; From dea1a19b2de2fe031f714904c5247cf27b363237 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 11:34:27 -0400 Subject: [PATCH 06/17] Fix the packet data allocation methods. Small fixes from changesets after my initial work. This now compiles. src/mem/cache/base_cache.cc: Fix getPort function that changed src/mem/cache/base_cache.hh: Fix get port function, provide default implementations of virtual functions in the base class src/mem/cache/cache.hh: Fix virtual function declerations src/mem/cache/cache_builder.cc: Fix params src/mem/cache/cache_impl.hh: src/mem/cache/miss/blocking_buffer.cc: src/mem/cache/miss/miss_queue.cc: src/mem/cache/miss/mshr.cc: src/mem/cache/prefetch/base_prefetcher.cc: src/mem/cache/tags/iic.cc: src/mem/cache/tags/lru.cc: Properly allocate data in packet --HG-- extra : convert_revision : dedf8b0f76ab90b06b60f8fe079c0ae361f91a48 --- src/mem/cache/base_cache.cc | 2 +- src/mem/cache/base_cache.hh | 26 ++++++++++++++++++----- src/mem/cache/cache.hh | 8 +++---- src/mem/cache/cache_builder.cc | 4 ++-- src/mem/cache/cache_impl.hh | 3 +-- src/mem/cache/miss/blocking_buffer.cc | 3 +-- src/mem/cache/miss/miss_queue.cc | 3 +-- src/mem/cache/miss/mshr.cc | 3 +-- src/mem/cache/prefetch/base_prefetcher.cc | 3 +-- src/mem/cache/tags/iic.cc | 7 +++--- src/mem/cache/tags/lru.cc | 3 ++- 11 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 89e23ce318..c1ed6d3d43 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -99,7 +99,7 @@ BaseCache::CachePort::clearBlocked() } Port* -BaseCache::getPort(const std::string &if_name) +BaseCache::getPort(const std::string &if_name, int idx) { if(if_name == "cpu_side") { diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh index 977e0ae297..2754fab5a6 100644 --- a/src/mem/cache/base_cache.hh +++ b/src/mem/cache/base_cache.hh @@ -41,6 +41,7 @@ #include #include +#include "base/misc.hh" #include "base/statistics.hh" #include "base/trace.hh" #include "mem/mem_object.hh" @@ -122,14 +123,29 @@ class BaseCache : public MemObject CachePort *memSidePort; public: - virtual Port *getPort(const std::string &if_name); + virtual Port *getPort(const std::string &if_name, int idx = -1); private: //To be defined in cache_impl.hh not in base class - virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); - virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); - virtual void recvStatusChange(Port::Status status, bool isCpuSide); + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide) + { + fatal("No implementation"); + } + + virtual void recvStatusChange(Port::Status status, bool isCpuSide) + { + fatal("No implementation"); + } /** * Bit vector of the blocking reasons for the access path. diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index d2af1d8bf4..788715e761 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -146,16 +146,16 @@ class Cache : public BaseCache /** Instantiates a basic cache object. */ Cache(const std::string &_name, Params ¶ms); - bool doTimingAccess(Packet *pkt, CachePort *cachePort, + virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, + virtual Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - void doFunctionalAccess(Packet *pkt, CachePort *cachePort, + virtual void doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - void recvStatusChange(Port::Status status, bool isCpuSide); + virtual void recvStatusChange(Port::Status status, bool isCpuSide); void regStats(); diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc index 8758dc57a4..05a149a1cd 100644 --- a/src/mem/cache/cache_builder.cc +++ b/src/mem/cache/cache_builder.cc @@ -230,7 +230,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) Cache, b, c>::Params params(tagStore, mq, coh, \ do_copy, base_params, \ /*in_bus, out_bus,*/ pf, \ - prefetch_access); \ + prefetch_access, hit_latency); \ Cache, b, c> *retval = \ new Cache, b, c>(getInstanceName(), /*hier,*/ \ params); \ @@ -242,7 +242,7 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache) retval->setMasterInterface(new MasterInterface, b, c>, Bus>(getInstanceName(), hier, retval, out_bus)); \ out_bus->rangeChange(); \ return retval; \ -*/return true; \ +*/return retval; \ } while (0) #define BUILD_CACHE_PANIC(x) do { \ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index dbf2e49f14..f1e9c3698f 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -588,8 +588,7 @@ Cache::probe(Packet * &pkt, bool update) Packet * busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize); - uint8_t* temp_data = new uint8_t[blkSize]; - busPkt->dataDynamicArray(temp_data); + busPkt->allocate(); busPkt->time = curTick; diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc index d745cb8c65..10d53b109d 100644 --- a/src/mem/cache/miss/blocking_buffer.cc +++ b/src/mem/cache/miss/blocking_buffer.cc @@ -210,8 +210,7 @@ BlockingBuffer::doWriteback(Addr addr, int asid, // Generate request Request * req = new Request(addr, size, 0); Packet * pkt = new Packet(req, Packet::Writeback, -1); - uint8_t *new_data = new uint8_t[size]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); if (data) { memcpy(pkt->getPtr(), data, size); } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 34290351de..99ebab0179 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -714,8 +714,7 @@ MissQueue::doWriteback(Addr addr, int asid, // Generate request Request * req = new Request(addr, size, 0); Packet * pkt = new Packet(req, Packet::Writeback, -1); - uint8_t *new_data = new uint8_t[size]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); if (data) { memcpy(pkt->getPtr(), data, size); } diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index fe8cbeea45..05a2fe1c59 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -90,8 +90,7 @@ MSHR::allocateAsBuffer(Packet * &target) asid = target->req->getAsid(); threadNum = target->req->getThreadNum(); pkt = new Packet(target->req, target->cmd, -1); - uint8_t *new_data = new uint8_t[target->getSize()]; - pkt->dataDynamicArray(new_data); + pkt->allocate(); pkt->senderState = (Packet::SenderState*)this; pkt->time = curTick; } diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc index 29da537468..8975519896 100644 --- a/src/mem/cache/prefetch/base_prefetcher.cc +++ b/src/mem/cache/prefetch/base_prefetcher.cc @@ -181,8 +181,7 @@ BasePrefetcher::handleMiss(Packet * &pkt, Tick time) Request * prefetchReq = new Request(*addr, blkSize, 0); Packet * prefetch; prefetch = new Packet(prefetchReq, Packet::HardPFReq, -1); - uint8_t *new_data = new uint8_t[blkSize]; - prefetch->dataDynamicArray(new_data); + prefetch->allocate(); prefetch->req->setThreadContext(pkt->req->getCpuNum(), pkt->req->getThreadNum()); diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc index 0071ca2837..847fabc887 100644 --- a/src/mem/cache/tags/iic.cc +++ b/src/mem/cache/tags/iic.cc @@ -430,10 +430,11 @@ IIC::freeReplacementBlock(PacketList & writebacks) tag_ptr->data, tag_ptr->size); */ - Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), + Request *writebackReq = new Request(regenerateBlkAddr(tag_ptr->tag, 0), blkSize, 0); - Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); - writeback->dataDynamic(tag_ptr->data); + Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); + writeback->allocate(); + memcpy(writeback->getPtr(), tag_ptr->data, blkSize); writebacks.push_back(writeback); } diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index 81b84e11e8..b7259bd3ab 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -280,7 +280,8 @@ LRU::doCopy(Addr source, Addr dest, int asid, PacketList &writebacks) dest_blk->set), blkSize, 0); Packet *writeback = new Packet(writebackReq, Packet::Writeback, -1); - writeback->dataDynamic(dest_blk->data); + writeback->allocate(); + memcpy(writeback->getPtr(),dest_blk->data, blkSize); writebacks.push_back(writeback); } dest_blk->tag = extractTag(dest); From 1bdc65b00f40b20dc5c7e97d3c8d8e4b311230a8 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 16:25:35 -0400 Subject: [PATCH 07/17] First pass, now compiles with current head of tree. Compile and initialization work, still working on functionality. src/mem/cache/base_cache.cc: Temp fix for cpu's use of getPort functionality. CPU's will need to be ported to the new connector objects. Also, all packets have to have data or the delete fails. src/mem/cache/cache.hh: Fix function prototypes so overloading works src/mem/cache/cache_impl.hh: fix functions to match virtual base class src/mem/cache/miss/miss_queue.cc: Packets havve to have data, or delete fails src/python/m5/objects/BaseCache.py: Update for newmem --HG-- extra : convert_revision : 2b6ad1e9d8ae07ace9294cd257e2ccc0024b7fcb --- src/mem/cache/base_cache.cc | 21 ++++++++++++++------- src/mem/cache/cache.hh | 6 ++---- src/mem/cache/cache_impl.hh | 8 ++++---- src/mem/cache/miss/miss_queue.cc | 3 ++- src/python/m5/objects/BaseCache.py | 12 ++++++------ 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index c1ed6d3d43..aaaf1bdefb 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -101,16 +101,21 @@ BaseCache::CachePort::clearBlocked() Port* BaseCache::getPort(const std::string &if_name, int idx) { - if(if_name == "cpu_side") + if (if_name == "") { - if(cpuSidePort != NULL) - panic("Already have a cpu side for this cache\n"); - cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); return cpuSidePort; } - else if(if_name == "mem_side") + if (if_name == "functional") { - if(memSidePort != NULL) + if(cpuSidePort == NULL) + cpuSidePort = new CachePort(name() + "-cpu_side_port", this, true); + return cpuSidePort; + } + else if (if_name == "mem_side") + { + if (memSidePort != NULL) panic("Already have a mem side for this cache\n"); memSidePort = new CachePort(name() + "-mem_side_port", this, false); return memSidePort; @@ -121,9 +126,10 @@ BaseCache::getPort(const std::string &if_name, int idx) void BaseCache::regStats() { - Request temp_req; + Request temp_req((Addr) NULL, 4, 0); Packet::Command temp_cmd = Packet::ReadReq; Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); //Temp allocate, all need data using namespace Stats; @@ -331,4 +337,5 @@ BaseCache::regStats() .name(name() + ".cache_copies") .desc("number of cache copies performed") ; + } diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 788715e761..1243c9d9ec 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -149,11 +149,9 @@ class Cache : public BaseCache virtual bool doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide); - virtual Tick doAtomicAccess(Packet *pkt, CachePort *cachePort, - bool isCpuSide); + virtual Tick doAtomicAccess(Packet *pkt, bool isCpuSide); - virtual void doFunctionalAccess(Packet *pkt, CachePort *cachePort, - bool isCpuSide); + virtual void doFunctionalAccess(Packet *pkt, bool isCpuSide); virtual void recvStatusChange(Port::Status status, bool isCpuSide); diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index f1e9c3698f..0cb33461bb 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -77,7 +77,7 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) template Tick Cache:: -doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) +doAtomicAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { @@ -97,18 +97,18 @@ doAtomicAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) template void Cache:: -doFunctionalAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide) +doFunctionalAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { - probe(pkt, false); + probe(pkt, true); } else { if (pkt->isResponse()) handleResponse(pkt); else - snoopProbe(pkt, false); + snoopProbe(pkt, true); } } diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc index 99ebab0179..da0448ad3a 100644 --- a/src/mem/cache/miss/miss_queue.cc +++ b/src/mem/cache/miss/miss_queue.cc @@ -58,9 +58,10 @@ MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers, void MissQueue::regStats(const string &name) { - Request temp_req; + Request temp_req((Addr) NULL, 4, 0); Packet::Command temp_cmd = Packet::ReadReq; Packet temp_pkt(&temp_req, temp_cmd, 0); //@todo FIx command strings so this isn't neccessary + temp_pkt.allocate(); using namespace Stats; diff --git a/src/python/m5/objects/BaseCache.py b/src/python/m5/objects/BaseCache.py index 33f44759bf..497b2b038a 100644 --- a/src/python/m5/objects/BaseCache.py +++ b/src/python/m5/objects/BaseCache.py @@ -1,29 +1,26 @@ from m5.config import * -from BaseMem import BaseMem +from MemObject import MemObject class Prefetch(Enum): vals = ['none', 'tagged', 'stride', 'ghb'] -class BaseCache(BaseMem): +class BaseCache(MemObject): type = 'BaseCache' adaptive_compression = Param.Bool(False, "Use an adaptive compression scheme") assoc = Param.Int("associativity") block_size = Param.Int("block size in bytes") + latency = Param.Int("Latency") compressed_bus = Param.Bool(False, "This cache connects to a compressed memory") compression_latency = Param.Latency('0ns', "Latency in cycles of compression algorithm") do_copy = Param.Bool(False, "perform fast copies in the cache") hash_delay = Param.Int(1, "time in cycles of hash access") - in_bus = Param.Bus(NULL, "incoming bus object") lifo = Param.Bool(False, "whether this NIC partition should use LIFO repl. policy") max_miss_count = Param.Counter(0, "number of misses to handle before calling exit") - mem_trace = Param.MemTraceWriter(NULL, - "memory trace writer to record accesses") mshrs = Param.Int("number of MSHRs (max outstanding requests)") - out_bus = Param.Bus("outgoing bus object") prioritizeRequests = Param.Bool(False, "always service demand misses first") protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use") @@ -63,3 +60,6 @@ class BaseCache(BaseMem): "Use the CPU ID to seperate calculations of prefetches") prefetch_data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") + hit_latency = Param.Int(1,"Hit Latency of the cache") + cpu_side = Port("Port on side closer to CPU") + mem_side = Port("Port on side closer to MEM") From 7a4929813423c6f72827c58453cb9bd591f1801c Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Fri, 30 Jun 2006 17:21:58 -0400 Subject: [PATCH 08/17] AtomicSimpleCPU with a cache now runs the hello world! test program. Need to clean up a bunch of flags/hacks in the code. Then onto Timming mode. Functional accesses also work properly, although not exactly how we wanted them. I'll need to clean that up as well. src/cpu/simple/atomic.cc: Atomic CPU needs to set thread context so stats work in cache. Temporarily just use CPU=0 ThreadID=0 src/mem/cache/cache_impl.hh: Need to return success/failure properly still Physical memory object doesn't assert SATISFIED anymore, need to remove that flag src/mem/cache/tags/lru.cc: Doesn't work if the REQ doesn't set it's ASID. Temporary fix use 0 always --HG-- extra : convert_revision : d06a39684af593db699b64df9a29f80c61d8d050 --- src/cpu/simple/atomic.cc | 3 +++ src/mem/cache/cache_impl.hh | 10 ++++++++-- src/mem/cache/tags/lru.cc | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index be6f421b32..b7202cbbb7 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -124,15 +124,18 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p) // @todo fix me and get the real cpu id & thread number!!! ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); ifetch_pkt->dataStatic(&inst); data_read_req = new Request(); + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_pkt = new Packet(data_read_req, Packet::ReadReq, Packet::Broadcast); data_read_pkt->dataStatic(&dataReg); data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_pkt = new Packet(data_write_req, Packet::WriteReq, Packet::Broadcast); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 0cb33461bb..aae5cbf018 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -82,6 +82,8 @@ doAtomicAccess(Packet *pkt, bool isCpuSide) if (isCpuSide) { probe(pkt, true); + //TEMP ALWAYS SUCCES FOR NOW + pkt->result = Packet::Success; } else { @@ -101,7 +103,11 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide) { if (isCpuSide) { + //TEMP USE CPU?THREAD 0 0 + pkt->req->setThreadContext(0,0); probe(pkt, true); + //TEMP ALWAYS SUCCESFUL FOR NOW + pkt->result = Packet::Success; } else { @@ -594,12 +600,12 @@ Cache::probe(Packet * &pkt, bool update) lat = memSidePort->sendAtomic(busPkt); - if (!(busPkt->flags & SATISFIED)) { +/* if (!(busPkt->flags & SATISFIED)) { // blocked at a higher level, just return return 0; } - misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; +*/ misses[pkt->cmdToIndex()][pkt->req->getThreadNum()]++; CacheBlk::State old_state = (blk) ? blk->status : 0; tags->handleFill(blk, busPkt, diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc index b7259bd3ab..556025a3ab 100644 --- a/src/mem/cache/tags/lru.cc +++ b/src/mem/cache/tags/lru.cc @@ -188,7 +188,7 @@ LRUBlk* LRU::findBlock(Packet * &pkt, int &lat) { Addr addr = pkt->getAddr(); - int asid = pkt->req->getAsid(); + int asid = 0;//pkt->req->getAsid(); Addr tag = extractTag(addr); unsigned set = extractSet(addr); From d9ef772e8d43ebfd2a4bece76f33cc62d71258a6 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 30 Jun 2006 19:52:08 -0400 Subject: [PATCH 09/17] Make O3CPU model independent of the ISA Use O3CPU when building instead of AlphaO3CPU. I could use some better python magic in the cpu_models.py file! AUTHORS: add middle initial SConstruct: change from AlphaO3CPU to O3CPU src/cpu/SConscript: edits to build O3CPU instead of AlphaO3CPU src/cpu/cpu_models.py: change substitution template to use proper CPU EXEC CONTEXT For O3CPU Model... Actually, some Python expertise could be used here. The 'env' variable is not passed to this file, so I had to parse through the ARGV to find the ISA... src/cpu/o3/base_dyn_inst.cc: src/cpu/o3/bpred_unit.cc: src/cpu/o3/commit.cc: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/decode.cc: src/cpu/o3/fetch.cc: src/cpu/o3/iew.cc: src/cpu/o3/inst_queue.cc: src/cpu/o3/lsq.cc: src/cpu/o3/lsq_unit.cc: src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/rename.cc: src/cpu/o3/rob.cc: use isa_specific.hh src/sim/process.cc: only initi NextNPC if not ALPHA src/cpu/o3/alpha/cpu.cc: alphao3cpu impl src/cpu/o3/alpha/cpu.hh: move AlphaTC to it's own file src/cpu/o3/alpha/cpu_impl.hh: Move AlphaTC to it's own file ... src/cpu/o3/alpha/dyn_inst.cc: src/cpu/o3/alpha/dyn_inst.hh: src/cpu/o3/alpha/dyn_inst_impl.hh: include paths src/cpu/o3/alpha/impl.hh: include paths, set default MaxThreads to 2 instead of 4 src/cpu/o3/alpha/params.hh: set Alpha Specific Params here src/python/m5/objects/O3CPU.py: add O3CPU class src/cpu/o3/SConscript: include isa-specific build files src/cpu/o3/alpha/thread_context.cc: NEW HOME of AlphaTC src/cpu/o3/alpha/thread_context.hh: new home of AlphaTC src/cpu/o3/isa_specific.hh: includes ISA specific files src/cpu/o3/params.hh: base o3 params src/cpu/o3/thread_context.hh: base o3 thread context src/cpu/o3/thread_context_impl.hh: base o3 thead context impl --HG-- rename : src/cpu/o3/alpha_cpu.cc => src/cpu/o3/alpha/cpu.cc rename : src/cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha/cpu.hh rename : src/cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc rename : src/cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha/cpu_impl.hh rename : src/cpu/o3/alpha_dyn_inst.cc => src/cpu/o3/alpha/dyn_inst.cc rename : src/cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha/dyn_inst.hh rename : src/cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha/dyn_inst_impl.hh rename : src/cpu/o3/alpha_impl.hh => src/cpu/o3/alpha/impl.hh rename : src/cpu/o3/alpha_params.hh => src/cpu/o3/alpha/params.hh rename : src/python/m5/objects/AlphaO3CPU.py => src/python/m5/objects/O3CPU.py extra : convert_revision : d377d6417452ac337bc502f28b2fde907d6b340e --- AUTHORS | 2 +- SConstruct | 2 +- src/cpu/SConscript | 8 +- src/cpu/cpu_models.py | 22 +- src/cpu/o3/SConscript | 79 +++ src/cpu/o3/{alpha_cpu.cc => alpha/cpu.cc} | 6 +- src/cpu/o3/alpha/cpu.hh | 204 ++++++++ .../cpu_builder.cc} | 0 .../{alpha_cpu_impl.hh => alpha/cpu_impl.hh} | 469 +---------------- .../{alpha_dyn_inst.cc => alpha/dyn_inst.cc} | 4 +- .../{alpha_dyn_inst.hh => alpha/dyn_inst.hh} | 4 +- .../dyn_inst_impl.hh} | 2 +- src/cpu/o3/{alpha_impl.hh => alpha/impl.hh} | 4 +- src/cpu/o3/alpha/params.hh | 69 +++ src/cpu/o3/alpha/thread_context.cc | 36 ++ src/cpu/o3/alpha/thread_context.hh | 70 +++ src/cpu/o3/alpha_cpu.hh | 434 ---------------- src/cpu/o3/base_dyn_inst.cc | 3 +- src/cpu/o3/bpred_unit.cc | 3 +- src/cpu/o3/commit.cc | 3 +- src/cpu/o3/cpu.cc | 3 +- src/cpu/o3/cpu.hh | 8 + src/cpu/o3/decode.cc | 3 +- src/cpu/o3/fetch.cc | 3 +- src/cpu/o3/iew.cc | 3 +- src/cpu/o3/inst_queue.cc | 3 +- src/cpu/o3/isa_specific.hh | 40 ++ src/cpu/o3/lsq.cc | 4 +- src/cpu/o3/lsq_unit.cc | 4 +- src/cpu/o3/mem_dep_unit.cc | 3 +- src/cpu/o3/{alpha_params.hh => params.hh} | 30 +- src/cpu/o3/rename.cc | 3 +- src/cpu/o3/rob.cc | 3 +- src/cpu/o3/thread_context.hh | 243 +++++++++ src/cpu/o3/thread_context_impl.hh | 488 ++++++++++++++++++ .../m5/objects/{AlphaO3CPU.py => O3CPU.py} | 4 +- src/sim/process.cc | 3 + 37 files changed, 1300 insertions(+), 972 deletions(-) create mode 100755 src/cpu/o3/SConscript rename src/cpu/o3/{alpha_cpu.cc => alpha/cpu.cc} (94%) create mode 100644 src/cpu/o3/alpha/cpu.hh rename src/cpu/o3/{alpha_cpu_builder.cc => alpha/cpu_builder.cc} (100%) rename src/cpu/o3/{alpha_cpu_impl.hh => alpha/cpu_impl.hh} (52%) rename src/cpu/o3/{alpha_dyn_inst.cc => alpha/dyn_inst.cc} (95%) rename src/cpu/o3/{alpha_dyn_inst.hh => alpha/dyn_inst.hh} (99%) rename src/cpu/o3/{alpha_dyn_inst_impl.hh => alpha/dyn_inst_impl.hh} (99%) rename src/cpu/o3/{alpha_impl.hh => alpha/impl.hh} (98%) create mode 100644 src/cpu/o3/alpha/params.hh create mode 100755 src/cpu/o3/alpha/thread_context.cc create mode 100644 src/cpu/o3/alpha/thread_context.hh delete mode 100644 src/cpu/o3/alpha_cpu.hh create mode 100755 src/cpu/o3/isa_specific.hh rename src/cpu/o3/{alpha_params.hh => params.hh} (88%) mode change 100644 => 100755 create mode 100755 src/cpu/o3/thread_context.hh create mode 100755 src/cpu/o3/thread_context_impl.hh rename src/python/m5/objects/{AlphaO3CPU.py => O3CPU.py} (98%) diff --git a/AUTHORS b/AUTHORS index 8904070d89..ec3de7bb22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -28,7 +28,7 @@ Ronald G. Dreslinski Jr Gabriel Black ----------------------- -Korey Sewell +Korey L. Sewell ----------------------- David Green diff --git a/SConstruct b/SConstruct index 2dc53f7cb3..b18fe66d34 100644 --- a/SConstruct +++ b/SConstruct @@ -263,7 +263,7 @@ env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] # Define the universe of supported CPU models env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', - 'FullCPU', 'AlphaO3CPU', + 'FullCPU', 'O3CPU', 'OzoneCPU'] # Sticky options get saved in the options file so they persist from diff --git a/src/cpu/SConscript b/src/cpu/SConscript index baa5d531e4..3dcc2f1ec1 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -104,7 +104,7 @@ env.Depends('static_inst_exec_sigs.hh', Value(env['CPU_MODELS'])) # List of suppported CPUs by the Checker. Errors out if USE_CHECKER=True # and one of these are not being used. -CheckerSupportedCPUList = ['AlphaO3CPU', 'OzoneCPU'] +CheckerSupportedCPUList = ['O3CPU', 'OzoneCPU'] ################################################################# # @@ -130,12 +130,10 @@ if need_simple_base: if 'FastCPU' in env['CPU_MODELS']: sources += Split('fast/cpu.cc') -if 'AlphaO3CPU' in env['CPU_MODELS']: +if 'O3CPU' in env['CPU_MODELS']: + sources += SConscript('o3/SConscript', exports = 'env') sources += Split(''' o3/2bit_local_pred.cc - o3/alpha_dyn_inst.cc - o3/alpha_cpu.cc - o3/alpha_cpu_builder.cc o3/base_dyn_inst.cc o3/bpred_unit.cc o3/btb.cc diff --git a/src/cpu/cpu_models.py b/src/cpu/cpu_models.py index e7ef9ab42b..1add327458 100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@ -26,6 +26,10 @@ # # Authors: Steve Reinhardt +import os +import os.path +import sys + ################ # CpuModel class # @@ -47,7 +51,6 @@ class CpuModel: # Add self to dict CpuModel.dict[name] = self - # # Define CPU models. # @@ -67,9 +70,6 @@ CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', CpuModel('FullCPU', 'full_cpu_exec.cc', '#include "encumbered/cpu/full/dyn_inst.hh"', { 'CPU_exec_context': 'DynInst' }) -CpuModel('AlphaO3CPU', 'alpha_o3_exec.cc', - '#include "cpu/o3/alpha_dyn_inst.hh"', - { 'CPU_exec_context': 'AlphaDynInst' }) CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', '#include "cpu/ozone/dyn_inst.hh"', { 'CPU_exec_context': 'OzoneDynInst' }) @@ -80,3 +80,17 @@ CpuModel('CheckerCPU', 'checker_cpu_exec.cc', '#include "cpu/checker/cpu.hh"', { 'CPU_exec_context': 'CheckerCPU' }) +# Maybe there is a more clever way to determine ISA +# here but since the environment variable isnt passed through +# here the easiest way is this... +sub_template = 'not found' +for argument in sys.argv: + if 'ALPHA' in argument: + sub_template = 'AlphaDynInst' + +if sub_template == 'not found': + sys.exit('NO CPU_exec_context substitution defined for this ISA') + +CpuModel('O3CPU', 'o3_cpu_exec.cc', + '#include "cpu/o3/isa_specific.hh"', + { 'CPU_exec_context': sub_template }) diff --git a/src/cpu/o3/SConscript b/src/cpu/o3/SConscript new file mode 100755 index 0000000000..e65d41411c --- /dev/null +++ b/src/cpu/o3/SConscript @@ -0,0 +1,79 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Korey Sewell + +import os +import os.path +import sys + +# Import build environment variable from SConstruct. +Import('env') + + +################################################################# +# +# Include ISA-specific files for the O3 CPU-model +# +################################################################# + +sources = [] + +if env['TARGET_ISA'] == 'alpha': + sources += Split(''' + alpha/dyn_inst.cc + alpha/cpu.cc + alpha/thread_context.cc + alpha/cpu_builder.cc + ''') +elif env['TARGET_ISA'] == 'mips': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # mips/dyn_inst.cc + # mips/cpu.cc + # mips/thread_context.cc + # mips/cpu_builder.cc + # ''') +elif env['TARGET_ISA'] == 'sparc': + sys.exit('O3 CPU does not support MIPS') + #sources += Split(''' + # sparc/dyn_inst.cc + # sparc/cpu.cc + # sparc/thread_context.cc + # sparc/cpu_builder.cc + # ''') +else: + sys.exit('O3 CPU does not support the \'%s\' ISA' % env['TARGET_ISA']) + + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --git a/src/cpu/o3/alpha_cpu.cc b/src/cpu/o3/alpha/cpu.cc similarity index 94% rename from src/cpu/o3/alpha_cpu.cc rename to src/cpu/o3/alpha/cpu.cc index e44ed00311..87a4d03a77 100644 --- a/src/cpu/o3/alpha_cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,9 +28,9 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_cpu_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alphaimpl.hh" +#include "cpu/o3/alpha/cpu_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" // Force instantiation of AlphaO3CPU for all the implemntations that are // needed. Consider merging this and alpha_dyn_inst.cc, and maybe all diff --git a/src/cpu/o3/alpha/cpu.hh b/src/cpu/o3/alpha/cpu.hh new file mode 100644 index 0000000000..b961341d58 --- /dev/null +++ b/src/cpu/o3/alpha/cpu.hh @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_CPU_HH__ +#define __CPU_O3_ALPHA_CPU_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/thread_context.hh" +#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * AlphaO3CPU class. Derives from the FullO3CPU class, and + * implements all ISA and implementation specific functions of the + * CPU. This is the CPU class that is used for the SimObjects, and is + * what is given to the DynInsts. Most of its state exists in the + * FullO3CPU; the state is has is mainly for ISA specific + * functionality. + */ +template +class AlphaO3CPU : public FullO3CPU +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: + typedef O3ThreadState ImplState; + typedef O3ThreadState Thread; + typedef typename Impl::Params Params; + + /** Constructs an AlphaO3CPU with the given parameters. */ + AlphaO3CPU(Params *params); + +#if FULL_SYSTEM + /** ITB pointer. */ + AlphaITB *itb; + /** DTB pointer. */ + AlphaDTB *dtb; +#endif + + /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM + /** Translates instruction requestion. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return itb->translate(req, thread->getTC()); + } + + /** Translates data read request. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), false); + } + + /** Translates data write request. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return dtb->translate(req, thread->getTC(), true); + } + +#else + /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + + /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(RequestPtr &req, Thread *thread) + { + return thread->getProcessPtr()->pTable->translate(req); + } + +#endif + /** Reads a miscellaneous register. */ + MiscReg readMiscReg(int misc_reg, unsigned tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + + /** Sets a miscellaneous register. */ + Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(unsigned tid); + +#if FULL_SYSTEM + /** Posts an interrupt. */ + void post_interrupt(int int_num, int index); + /** Reads the interrupt flag. */ + int readIntrFlag(); + /** Sets the interrupt flags. */ + void setIntrFlag(int val); + /** HW return from error interrupt. */ + Fault hwrei(unsigned tid); + /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + + bool simPalCheck(int palFunc, unsigned tid); + + /** Processes any interrupts. */ + void processInterrupts(); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } +#endif + + /** Traps to handle given fault. */ + void trap(Fault fault, unsigned tid); + +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, int tid); + /** Gets a syscall argument. */ + IntReg getSyscallArg(int i, int tid); + + /** Used to shift args for indirect syscall. */ + void setSyscallArg(int i, IntReg val, int tid); + + /** Sets the return value of a syscall. */ + void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + + /** CPU read function, forwards read to LSQ. */ + template + Fault read(RequestPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + + /** CPU write function, forwards write to LSQ. */ + template + Fault write(RequestPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; +}; + +#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc similarity index 100% rename from src/cpu/o3/alpha_cpu_builder.cc rename to src/cpu/o3/alpha/cpu_builder.cc diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh similarity index 52% rename from src/cpu/o3/alpha_cpu_impl.hh rename to src/cpu/o3/alpha/cpu_impl.hh index eca6fbbcbf..2da683398b 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -38,8 +38,9 @@ #include "sim/sim_events.hh" #include "sim/stats.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/params.hh" +#include "cpu/o3/alpha/tc.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/thread_state.hh" @@ -110,7 +111,8 @@ AlphaO3CPU::AlphaO3CPU(Params *params) ThreadContext *tc; // Setup the TC that will serve as the interface to the threads/CPU. - AlphaTC *alpha_tc = new AlphaTC; + AlphaTC *alpha_tc = + new AlphaTC; tc = alpha_tc; @@ -118,7 +120,7 @@ AlphaO3CPU::AlphaO3CPU(Params *params) // CheckerThreadContext. #if USE_CHECKER if (params->checker) { - tc = new CheckerThreadContext( + tc = new CheckerThreadContext>( alpha_tc, this->checker); } #endif @@ -187,465 +189,6 @@ AlphaO3CPU::regStats() this->commit.regStats(); } -#if FULL_SYSTEM -template -VirtualPort * -AlphaO3CPU::AlphaTC::getVirtPort(ThreadContext *src_tc) -{ - if (!src_tc) - return thread->getVirtPort(); - - VirtualPort *vp; - Port *mem_port; - - vp = new VirtualPort("tc-vport", src_tc); - mem_port = cpu->system->physmem->getPort("functional"); - mem_port->setPeer(vp); - vp->setPeer(mem_port); - return vp; -} - -template -void -AlphaO3CPU::AlphaTC::dumpFuncProfile() -{ - // Currently not supported -} -#endif - -template -void -AlphaO3CPU::AlphaTC::takeOverFrom(ThreadContext *old_context) -{ - // some things should already be set up -#if FULL_SYSTEM - assert(getSystemPtr() == old_context->getSystemPtr()); -#else - assert(getProcessPtr() == old_context->getProcessPtr()); -#endif - - // copy over functional state - setStatus(old_context->status()); - copyArchRegs(old_context); - setCpuId(old_context->readCpuId()); - -#if !FULL_SYSTEM - thread->funcExeInst = old_context->readFuncExeInst(); -#else - EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); - if (other_quiesce) { - // Point the quiesce event's TC at this TC so that it wakes up - // the proper CPU. - other_quiesce->tc = this; - } - if (thread->quiesceEvent) { - thread->quiesceEvent->tc = this; - } - - // Transfer kernel stats from one CPU to the other. - thread->kernelStats = old_context->getKernelStats(); -// storeCondFailures = 0; - cpu->lockFlag = false; -#endif - - old_context->setStatus(ThreadContext::Unallocated); - - thread->inSyscall = false; - thread->trapPending = false; -} - -#if FULL_SYSTEM -template -void -AlphaO3CPU::AlphaTC::delVirtPort(VirtualPort *vp) -{ - delete vp->getPeer(); - delete vp; -} -#endif - -template -void -AlphaO3CPU::AlphaTC::activate(int delay) -{ - DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Active) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; -#endif - - if (thread->status() == ThreadContext::Unallocated) { - cpu->activateWhenReady(thread->readTid()); - return; - } - - thread->setStatus(ThreadContext::Active); - - // status() == Suspended - cpu->activateContext(thread->readTid(), delay); -} - -template -void -AlphaO3CPU::AlphaTC::suspend() -{ - DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); - - if (thread->status() == ThreadContext::Suspended) - return; - -#if FULL_SYSTEM - thread->lastActivate = curTick; - thread->lastSuspend = curTick; -#endif -/* -#if FULL_SYSTEM - // Don't change the status from active if there are pending interrupts - if (cpu->check_interrupts()) { - assert(status() == ThreadContext::Active); - return; - } -#endif -*/ - thread->setStatus(ThreadContext::Suspended); - cpu->suspendContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::deallocate() -{ - DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); - - if (thread->status() == ThreadContext::Unallocated) - return; - - thread->setStatus(ThreadContext::Unallocated); - cpu->deallocateContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::halt() -{ - DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); - - if (thread->status() == ThreadContext::Halted) - return; - - thread->setStatus(ThreadContext::Halted); - cpu->haltContext(thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::regStats(const std::string &name) -{ -#if FULL_SYSTEM - thread->kernelStats = new Kernel::Statistics(cpu->system); - thread->kernelStats->regStats(name + ".kern"); -#endif -} - -template -void -AlphaO3CPU::AlphaTC::serialize(std::ostream &os) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->serialize(os); -#endif - -} - -template -void -AlphaO3CPU::AlphaTC::unserialize(Checkpoint *cp, const std::string §ion) -{ -#if FULL_SYSTEM - if (thread->kernelStats) - thread->kernelStats->unserialize(cp, section); -#endif - -} - -#if FULL_SYSTEM -template -EndQuiesceEvent * -AlphaO3CPU::AlphaTC::getQuiesceEvent() -{ - return thread->quiesceEvent; -} - -template -Tick -AlphaO3CPU::AlphaTC::readLastActivate() -{ - return thread->lastActivate; -} - -template -Tick -AlphaO3CPU::AlphaTC::readLastSuspend() -{ - return thread->lastSuspend; -} - -template -void -AlphaO3CPU::AlphaTC::profileClear() -{} - -template -void -AlphaO3CPU::AlphaTC::profileSample() -{} -#endif - -template -TheISA::MachInst -AlphaO3CPU::AlphaTC:: getInst() -{ - return thread->getInst(); -} - -template -void -AlphaO3CPU::AlphaTC::copyArchRegs(ThreadContext *tc) -{ - // This function will mess things up unless the ROB is empty and - // there are no instructions in the pipeline. - unsigned tid = thread->readTid(); - PhysRegIndex renamed_reg; - - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i); - - DPRINTF(O3CPU, "Copying over register %i, had data %lli, " - "now has data %lli.\n", - renamed_reg, cpu->readIntReg(renamed_reg), - tc->readIntReg(i)); - - cpu->setIntReg(renamed_reg, tc->readIntReg(i)); - } - - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { - renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); - cpu->setFloatRegBits(renamed_reg, - tc->readFloatRegBits(i)); - } - - // Copy the misc regs. - copyMiscRegs(tc, this); - - // Then finally set the PC and the next PC. - cpu->setPC(tc->readPC(), tid); - cpu->setNextPC(tc->readNextPC(), tid); -#if !FULL_SYSTEM - this->thread->funcExeInst = tc->readFuncExeInst(); -#endif -} - -template -void -AlphaO3CPU::AlphaTC::clearArchRegs() -{} - -template -uint64_t -AlphaO3CPU::AlphaTC::readIntReg(int reg_idx) -{ - return cpu->readArchIntReg(reg_idx, thread->readTid()); -} - -template -FloatReg -AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx, int width) -{ - switch(width) { - case 32: - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); - case 64: - return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); - default: - panic("Unsupported width!"); - return 0; - } -} - -template -FloatReg -AlphaO3CPU::AlphaTC::readFloatReg(int reg_idx) -{ - return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); -} - -template -FloatRegBits -AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx, int width) -{ - DPRINTF(Fault, "Reading floatint register through the TC!\n"); - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template -FloatRegBits -AlphaO3CPU::AlphaTC::readFloatRegBits(int reg_idx) -{ - return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setIntReg(int reg_idx, uint64_t val) -{ - cpu->setArchIntReg(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val, int width) -{ - switch(width) { - case 32: - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - break; - case 64: - cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); - break; - } - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatReg(int reg_idx, FloatReg val) -{ - cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); - - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val, - int width) -{ - DPRINTF(Fault, "Setting floatint register through the TC!\n"); - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setFloatRegBits(int reg_idx, FloatRegBits val) -{ - cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setPC(uint64_t val) -{ - cpu->setPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -void -AlphaO3CPU::AlphaTC::setNextPC(uint64_t val) -{ - cpu->setNextPC(val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } -} - -template -Fault -AlphaO3CPU::AlphaTC::setMiscReg(int misc_reg, const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -template -Fault -AlphaO3CPU::AlphaTC::setMiscRegWithEffect(int misc_reg, - const MiscReg &val) -{ - Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, - thread->readTid()); - - // Squash if we're not already in a state update mode. - if (!thread->trapPending && !thread->inSyscall) { - cpu->squashFromTC(thread->readTid()); - } - - return ret_fault; -} - -#if !FULL_SYSTEM - -template -TheISA::IntReg -AlphaO3CPU::AlphaTC::getSyscallArg(int i) -{ - return cpu->getSyscallArg(i, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setSyscallArg(int i, IntReg val) -{ - cpu->setSyscallArg(i, val, thread->readTid()); -} - -template -void -AlphaO3CPU::AlphaTC::setSyscallReturn(SyscallReturn return_value) -{ - cpu->setSyscallReturn(return_value, thread->readTid()); -} - -#endif // FULL_SYSTEM template MiscReg diff --git a/src/cpu/o3/alpha_dyn_inst.cc b/src/cpu/o3/alpha/dyn_inst.cc similarity index 95% rename from src/cpu/o3/alpha_dyn_inst.cc rename to src/cpu/o3/alpha/dyn_inst.cc index 0c1723eec1..97d2f3d080 100644 --- a/src/cpu/o3/alpha_dyn_inst.cc +++ b/src/cpu/o3/alpha/dyn_inst.cc @@ -28,8 +28,8 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst_impl.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst_impl.hh" +#include "cpu/o3/alpha/impl.hh" // Force instantiation of AlphaDynInst for all the implementations that // are needed. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha/dyn_inst.hh similarity index 99% rename from src/cpu/o3/alpha_dyn_inst.hh rename to src/cpu/o3/alpha/dyn_inst.hh index 464e53e9da..9dee610b68 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha/dyn_inst.hh @@ -34,8 +34,8 @@ #include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" class Packet; diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha/dyn_inst_impl.hh similarity index 99% rename from src/cpu/o3/alpha_dyn_inst_impl.hh rename to src/cpu/o3/alpha/dyn_inst_impl.hh index 855ee99634..2d1b4b3098 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha/dyn_inst_impl.hh @@ -28,7 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha/dyn_inst.hh" template AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, diff --git a/src/cpu/o3/alpha_impl.hh b/src/cpu/o3/alpha/impl.hh similarity index 98% rename from src/cpu/o3/alpha_impl.hh rename to src/cpu/o3/alpha/impl.hh index 84c9e1c004..cdcdff34a8 100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -33,7 +33,7 @@ #include "arch/alpha/isa_traits.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/cpu_policy.hh" // Forward declarations. @@ -81,7 +81,7 @@ struct AlphaSimpleImpl enum { MaxWidth = 8, - MaxThreads = 4 + MaxThreads = 2 }; }; diff --git a/src/cpu/o3/alpha/params.hh b/src/cpu/o3/alpha/params.hh new file mode 100644 index 0000000000..b1f2a487d2 --- /dev/null +++ b/src/cpu/o3/alpha/params.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_ALPHA_PARAMS_HH__ +#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" +#include "cpu/o3/params.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class MemObject; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the AlphaO3CPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public O3Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; +#else + std::vector workload; + Process *process; +#endif // FULL_SYSTEM + + MemObject *mem; + + BaseCPU *checker; + + unsigned decodeToFetchDelay; +}; + +#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --git a/src/cpu/o3/alpha/thread_context.cc b/src/cpu/o3/alpha/thread_context.cc new file mode 100755 index 0000000000..4a02715bc6 --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" +#include "cpu/o3/thread_context_impl.hh" + +template class O3ThreadContext; + diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh new file mode 100644 index 0000000000..890bff3ffb --- /dev/null +++ b/src/cpu/o3/alpha/thread_context.hh @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +template +class AlphaTC : public O3ThreadContext +{ +#if FULL_SYSTEM + /** Returns a pointer to the ITB. */ + virtual AlphaITB *getITBPtr() { return cpu->itb; } + + /** Returns a pointer to the DTB. */ + virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return thread->quiesceEvent; + } + + /** Returns if the thread is currently in PAL mode, based on + * the PC's value. */ + virtual bool inPalMode() + { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } +#endif + + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { + panic("Alpha has no NextNPC!"); + } + + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } +}; diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh deleted file mode 100644 index d7f3d5801f..0000000000 --- a/src/cpu/o3/alpha_cpu.hh +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Kevin Lim - */ - -#ifndef __CPU_O3_ALPHA_CPU_HH__ -#define __CPU_O3_ALPHA_CPU_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/thread_context.hh" -#include "cpu/o3/cpu.hh" -#include "sim/byteswap.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -class TranslatingPort; - -/** - * AlphaO3CPU class. Derives from the FullO3CPU class, and - * implements all ISA and implementation specific functions of the - * CPU. This is the CPU class that is used for the SimObjects, and is - * what is given to the DynInsts. Most of its state exists in the - * FullO3CPU; the state is has is mainly for ISA specific - * functionality. - */ -template -class AlphaO3CPU : public FullO3CPU -{ - protected: - typedef TheISA::IntReg IntReg; - typedef TheISA::FloatReg FloatReg; - typedef TheISA::FloatRegBits FloatRegBits; - typedef TheISA::MiscReg MiscReg; - typedef TheISA::RegFile RegFile; - typedef TheISA::MiscRegFile MiscRegFile; - - public: - typedef O3ThreadState ImplState; - typedef O3ThreadState Thread; - typedef typename Impl::Params Params; - - /** Constructs an AlphaO3CPU with the given parameters. */ - AlphaO3CPU(Params *params); - - /** - * Derived ThreadContext class for use with the AlphaO3CPU. It - * provides the interface for any external objects to access a - * single thread's state and some general CPU state. Any time - * external objects try to update state through this interface, - * the CPU will create an event to squash all in-flight - * instructions in order to ensure state is maintained correctly. - * It must be defined specifically for the AlphaO3CPU because - * not all architectural state is located within the O3ThreadState - * (such as the commit PC, and registers), and specific actions - * must be taken when using this interface (such as squashing all - * in-flight instructions when doing a write to this interface). - */ - class AlphaTC : public ThreadContext - { - public: - /** Pointer to the CPU. */ - AlphaO3CPU *cpu; - - /** Pointer to the thread state that this TC corrseponds to. */ - O3ThreadState *thread; - - /** Returns a pointer to this CPU. */ - virtual BaseCPU *getCpuPtr() { return cpu; } - - /** Sets this CPU's ID. */ - virtual void setCpuId(int id) { cpu->cpu_id = id; } - - /** Reads this CPU's ID. */ - virtual int readCpuId() { return cpu->cpu_id; } - -#if FULL_SYSTEM - /** Returns a pointer to the system. */ - virtual System *getSystemPtr() { return cpu->system; } - - /** Returns a pointer to physical memory. */ - virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } - - /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } - - /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } - - /** Returns a pointer to this thread's kernel statistics. */ - virtual Kernel::Statistics *getKernelStats() - { return thread->kernelStats; } - - virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } - - virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); - - void delVirtPort(VirtualPort *vp); -#else - virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } - - /** Returns a pointer to this thread's process. */ - virtual Process *getProcessPtr() { return thread->getProcessPtr(); } -#endif - /** Returns this thread's status. */ - virtual Status status() const { return thread->status(); } - - /** Sets this thread's status. */ - virtual void setStatus(Status new_status) - { thread->setStatus(new_status); } - - /** Set the status to Active. Optional delay indicates number of - * cycles to wait before beginning execution. */ - virtual void activate(int delay = 1); - - /** Set the status to Suspended. */ - virtual void suspend(); - - /** Set the status to Unallocated. */ - virtual void deallocate(); - - /** Set the status to Halted. */ - virtual void halt(); - -#if FULL_SYSTEM - /** Dumps the function profiling information. - * @todo: Implement. - */ - virtual void dumpFuncProfile(); -#endif - /** Takes over execution of a thread from another CPU. */ - virtual void takeOverFrom(ThreadContext *old_context); - - /** Registers statistics associated with this TC. */ - virtual void regStats(const std::string &name); - - /** Serializes state. */ - virtual void serialize(std::ostream &os); - /** Unserializes state. */ - virtual void unserialize(Checkpoint *cp, const std::string §ion); - -#if FULL_SYSTEM - /** Returns pointer to the quiesce event. */ - virtual EndQuiesceEvent *getQuiesceEvent(); - - /** Reads the last tick that this thread was activated on. */ - virtual Tick readLastActivate(); - /** Reads the last tick that this thread was suspended on. */ - virtual Tick readLastSuspend(); - - /** Clears the function profiling information. */ - virtual void profileClear(); - /** Samples the function profiling information. */ - virtual void profileSample(); -#endif - /** Returns this thread's ID number. */ - virtual int getThreadNum() { return thread->readTid(); } - - /** Returns the instruction this thread is currently committing. - * Only used when an instruction faults. - */ - virtual TheISA::MachInst getInst(); - - /** Copies the architectural registers from another TC into this TC. */ - virtual void copyArchRegs(ThreadContext *tc); - - /** Resets all architectural registers to 0. */ - virtual void clearArchRegs(); - - /** Reads an integer register. */ - virtual uint64_t readIntReg(int reg_idx); - - virtual FloatReg readFloatReg(int reg_idx, int width); - - virtual FloatReg readFloatReg(int reg_idx); - - virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - - virtual FloatRegBits readFloatRegBits(int reg_idx); - - /** Sets an integer register to a value. */ - virtual void setIntReg(int reg_idx, uint64_t val); - - virtual void setFloatReg(int reg_idx, FloatReg val, int width); - - virtual void setFloatReg(int reg_idx, FloatReg val); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - - virtual void setFloatRegBits(int reg_idx, FloatRegBits val); - - /** Reads this thread's PC. */ - virtual uint64_t readPC() - { return cpu->readPC(thread->readTid()); } - - /** Sets this thread's PC. */ - virtual void setPC(uint64_t val); - - /** Reads this thread's next PC. */ - virtual uint64_t readNextPC() - { return cpu->readNextPC(thread->readTid()); } - - /** Sets this thread's next PC. */ - virtual void setNextPC(uint64_t val); - - virtual uint64_t readNextNPC() - { - panic("Alpha has no NextNPC!"); - return 0; - } - - virtual void setNextNPC(uint64_t val) - { } - - /** Reads a miscellaneous register. */ - virtual MiscReg readMiscReg(int misc_reg) - { return cpu->readMiscReg(misc_reg, thread->readTid()); } - - /** Reads a misc. register, including any side-effects the - * read might have as defined by the architecture. */ - virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } - - /** Sets a misc. register. */ - virtual Fault setMiscReg(int misc_reg, const MiscReg &val); - - /** Sets a misc. register, including any side-effects the - * write might have as defined by the architecture. */ - virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - - /** Returns the number of consecutive store conditional failures. */ - // @todo: Figure out where these store cond failures should go. - virtual unsigned readStCondFailures() - { return thread->storeCondFailures; } - - /** Sets the number of consecutive store conditional failures. */ - virtual void setStCondFailures(unsigned sc_failures) - { thread->storeCondFailures = sc_failures; } - -#if FULL_SYSTEM - /** Returns if the thread is currently in PAL mode, based on - * the PC's value. */ - virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } -#endif - // Only really makes sense for old CPU model. Lots of code - // outside the CPU still checks this function, so it will - // always return false to keep everything working. - /** Checks if the thread is misspeculating. Because it is - * very difficult to determine if the thread is - * misspeculating, this is set as false. */ - virtual bool misspeculating() { return false; } - -#if !FULL_SYSTEM - /** Gets a syscall argument by index. */ - virtual IntReg getSyscallArg(int i); - - /** Sets a syscall argument. */ - virtual void setSyscallArg(int i, IntReg val); - - /** Sets the syscall return value. */ - virtual void setSyscallReturn(SyscallReturn return_value); - - /** Executes a syscall in SE mode. */ - virtual void syscall(int64_t callnum) - { return cpu->syscall(callnum, thread->readTid()); } - - /** Reads the funcExeInst counter. */ - virtual Counter readFuncExeInst() { return thread->funcExeInst; } -#endif - virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, - TheISA::RegFile::ContextVal val) - { panic("Not supported on Alpha!"); } - }; - -#if FULL_SYSTEM - /** ITB pointer. */ - AlphaITB *itb; - /** DTB pointer. */ - AlphaDTB *dtb; -#endif - - /** Registers statistics. */ - void regStats(); - -#if FULL_SYSTEM - /** Translates instruction requestion. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return itb->translate(req, thread->getTC()); - } - - /** Translates data read request. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), false); - } - - /** Translates data write request. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return dtb->translate(req, thread->getTC(), true); - } - -#else - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - - /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(RequestPtr &req, Thread *thread) - { - return thread->getProcessPtr()->pTable->translate(req); - } - -#endif - /** Reads a miscellaneous register. */ - MiscReg readMiscReg(int misc_reg, unsigned tid); - - /** Reads a misc. register, including any side effects the read - * might have as defined by the architecture. - */ - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); - - /** Sets a miscellaneous register. */ - Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); - - /** Sets a misc. register, including any side effects the write - * might have as defined by the architecture. - */ - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); - - /** Initiates a squash of all in-flight instructions for a given - * thread. The source of the squash is an external update of - * state through the TC. - */ - void squashFromTC(unsigned tid); - -#if FULL_SYSTEM - /** Posts an interrupt. */ - void post_interrupt(int int_num, int index); - /** Reads the interrupt flag. */ - int readIntrFlag(); - /** Sets the interrupt flags. */ - void setIntrFlag(int val); - /** HW return from error interrupt. */ - Fault hwrei(unsigned tid); - /** Returns if a specific PC is a PAL mode PC. */ - bool inPalMode(uint64_t PC) - { return AlphaISA::PcPAL(PC); } - - bool simPalCheck(int palFunc, unsigned tid); - - /** Processes any interrupts. */ - void processInterrupts(); - - /** Halts the CPU. */ - void halt() { panic("Halt not implemented!\n"); } -#endif - - /** Traps to handle given fault. */ - void trap(Fault fault, unsigned tid); - -#if !FULL_SYSTEM - /** Executes a syscall. - * @todo: Determine if this needs to be virtual. - */ - void syscall(int64_t callnum, int tid); - /** Gets a syscall argument. */ - IntReg getSyscallArg(int i, int tid); - - /** Used to shift args for indirect syscall. */ - void setSyscallArg(int i, IntReg val, int tid); - - /** Sets the return value of a syscall. */ - void setSyscallReturn(SyscallReturn return_value, int tid); -#endif - - /** CPU read function, forwards read to LSQ. */ - template - Fault read(RequestPtr &req, T &data, int load_idx) - { - return this->iew.ldstQueue.read(req, data, load_idx); - } - - /** CPU write function, forwards write to LSQ. */ - template - Fault write(RequestPtr &req, T &data, int store_idx) - { - return this->iew.ldstQueue.write(req, data, store_idx); - } - - Addr lockAddr; - - /** Temporary fix for the lock flag, works in the UP case. */ - bool lockFlag; -}; - -#endif // __CPU_O3_ALPHA_CPU_HH__ diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc index 1f7540d6a9..a0089fb8b0 100644 --- a/src/cpu/o3/base_dyn_inst.cc +++ b/src/cpu/o3/base_dyn_inst.cc @@ -29,8 +29,7 @@ */ #include "cpu/base_dyn_inst_impl.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" // Explicit instantiation template class BaseDynInst; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index c35c0a0aa9..4087fa07bd 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -29,7 +29,6 @@ */ #include "cpu/o3/bpred_unit_impl.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/isa_specific.hh" template class BPredUnit; diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 770008a330..9bbb526dc9 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" template class DefaultCommit; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 5533990481..c2282d6177 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -41,8 +41,7 @@ #include "cpu/activity.hh" #include "cpu/simple_thread.hh" #include "cpu/thread_context.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/cpu.hh" #include "sim/root.hh" diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b1ebcce9d0..9565bbe4f3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -49,11 +49,14 @@ #include "cpu/o3/cpu_policy.hh" #include "cpu/o3/scoreboard.hh" #include "cpu/o3/thread_state.hh" +//#include "cpu/o3/thread_context.hh" #include "sim/process.hh" template class Checker; class ThreadContext; +template +class O3ThreadContext; class MemObject; class Process; @@ -67,6 +70,10 @@ class BaseO3CPU : public BaseCPU void regStats(); + /** Sets this CPU's ID. */ + void setCpuId(int id) { cpu_id = id; } + + /** Reads this CPU's ID. */ int readCpuId() { return cpu_id; } protected: @@ -94,6 +101,7 @@ class FullO3CPU : public BaseO3CPU typedef typename std::list::iterator ListIt; + friend class O3ThreadContext; public: enum Status { Running, diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 4924f018a2..52d55983a8 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" template class DefaultDecode; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 5f52d0fca6..39b9879a40 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" template class DefaultFetch; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 8145f4cc73..bf8eb61ac7 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index f2c6b8213e..88f3f33a05 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. diff --git a/src/cpu/o3/isa_specific.hh b/src/cpu/o3/isa_specific.hh new file mode 100755 index 0000000000..f8a9dd8cc6 --- /dev/null +++ b/src/cpu/o3/isa_specific.hh @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Korey Sewell + */ + +#include "cpu/base.hh" + +#if THE_ISA == ALPHA_ISA + #include "cpu/o3/alpha/cpu.hh" + #include "cpu/o3/alpha/impl.hh" + #include "cpu/o3/alpha/params.hh" + #include "cpu/o3/alpha/dyn_inst.hh" +#else + #error "O3CPU doesnt support this ISA" +#endif diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index de0325920b..872576c32a 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -28,9 +28,7 @@ * Authors: Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index e935ffa5c7..9b244ac719 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -29,9 +29,7 @@ * Korey Sewell */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index a951032668..3edac95ac1 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/store_set.hh" #include "cpu/o3/mem_dep_unit_impl.hh" diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/params.hh old mode 100644 new mode 100755 similarity index 88% rename from src/cpu/o3/alpha_params.hh rename to src/cpu/o3/params.hh index f0732733e2..69a1bb937a --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/params.hh @@ -28,47 +28,29 @@ * Authors: Kevin Lim */ -#ifndef __CPU_O3_ALPHA_PARAMS_HH__ -#define __CPU_O3_ALPHA_PARAMS_HH__ +#ifndef __CPU_O3_PARAMS_HH__ +#define __CPU_O3_PARAMS_HH__ #include "cpu/o3/cpu.hh" //Forward declarations -class AlphaDTB; -class AlphaITB; class FUPool; -class MemObject; -class Process; -class System; /** - * This file defines the parameters that will be used for the AlphaO3CPU. + * This file defines the parameters that will be used for the O3CPU. * This must be defined externally so that the Impl can have a params class * defined that it can pass to all of the individual stages. */ - -class AlphaSimpleParams : public BaseO3CPU::Params +class O3Params : public BaseO3CPU::Params { public: - -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector workload; - Process *process; -#endif // FULL_SYSTEM - - MemObject *mem; - - BaseCPU *checker; - unsigned activity; // // Caches // -// MemInterface *icacheInterface; -// MemInterface *dcacheInterface; + // MemInterface *icacheInterface; + // MemInterface *dcacheInterface; unsigned cachePorts; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9ca8e82c6e..f972190b77 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -28,8 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" template class DefaultRename; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index f99e5ccfd2..ccef6b1554 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -29,8 +29,7 @@ * Nathan Binkert */ -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/isa_specific.hh" #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh new file mode 100755 index 0000000000..d60867029a --- /dev/null +++ b/src/cpu/o3/thread_context.hh @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#ifndef __CPU_O3_THREAD_CONTEXT_HH__ +#define __CPU_O3_THREAD_CONTEXT_HH__ + +#include "cpu/o3/isa_specific.hh" + +/** + * Derived ThreadContext class for use with the O3CPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the O3CPU because + * not all architectural state is located within the O3ThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template +class O3ThreadContext : public ThreadContext +{ + public: + typedef typename Impl::O3CPU O3CPU; + + /** Pointer to the CPU. */ + O3CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + O3ThreadState *thread; + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Sets this CPU's ID. */ + virtual void setCpuId(int id) { cpu->setCpuId(id); } + + /** Reads this CPU's ID. */ + virtual int readCpuId() { return cpu->readCpuId(); } + +#if FULL_SYSTEM + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + + /** Returns a pointer to physical memory. */ + virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } + + /** Returns a pointer to this thread's kernel statistics. */ + virtual Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(ThreadContext *src_tc = NULL); + + void delVirtPort(VirtualPort *vp); +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(); + + /** Set the status to Unallocated. */ + virtual void deallocate(); + + /** Set the status to Halted. */ + virtual void halt(); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns this thread's ID number. */ + virtual int getThreadNum() { return thread->readTid(); } + + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->readTid()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->readTid()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->readTid()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->readTid()); } + + /** Sets a misc. register. */ + virtual Fault setMiscReg(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Gets a syscall argument by index. */ + virtual IntReg getSyscallArg(int i); + + /** Sets a syscall argument. */ + virtual void setSyscallArg(int i, IntReg val); + + /** Sets the syscall return value. */ + virtual void setSyscallReturn(SyscallReturn return_value); + + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->readTid()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#endif +}; + +#endif diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh new file mode 100755 index 0000000000..fccabaf363 --- /dev/null +++ b/src/cpu/o3/thread_context_impl.hh @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + * Korey Sewell + */ + +#include "cpu/o3/thread_context.hh" + +using namespace TheISA; + +#if FULL_SYSTEM +template +VirtualPort * +O3ThreadContext::getVirtPort(ThreadContext *src_tc) +{ + if (!src_tc) + return thread->getVirtPort(); + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("tc-vport", src_tc); + mem_port = cpu->system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +template +void +O3ThreadContext::dumpFuncProfile() +{ + // Currently not supported +} +#endif + +template +void +O3ThreadContext::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(getSystemPtr() == old_context->getSystemPtr()); +#else + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setCpuId(old_context->readCpuId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Unallocated); + + thread->inSyscall = false; + thread->trapPending = false; +} + +#if FULL_SYSTEM +template +void +O3ThreadContext::delVirtPort(VirtualPort *vp) +{ + delete vp->getPeer(); + delete vp; +} +#endif + +template +void +O3ThreadContext::activate(int delay) +{ + DPRINTF(O3CPU, "Calling activate on AlphaTC\n"); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + if (thread->status() == ThreadContext::Unallocated) { + cpu->activateWhenReady(thread->readTid()); + return; + } + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->readTid(), delay); +} + +template +void +O3ThreadContext::suspend() +{ + DPRINTF(O3CPU, "Calling suspend on AlphaTC\n"); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->readTid()); +} + +template +void +O3ThreadContext::deallocate() +{ + DPRINTF(O3CPU, "Calling deallocate on AlphaTC\n"); + + if (thread->status() == ThreadContext::Unallocated) + return; + + thread->setStatus(ThreadContext::Unallocated); + cpu->deallocateContext(thread->readTid()); +} + +template +void +O3ThreadContext::halt() +{ + DPRINTF(O3CPU, "Calling halt on AlphaTC\n"); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->readTid()); +} + +template +void +O3ThreadContext::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template +void +O3ThreadContext::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template +void +O3ThreadContext::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template +Tick +O3ThreadContext::readLastActivate() +{ + return thread->lastActivate; +} + +template +Tick +O3ThreadContext::readLastSuspend() +{ + return thread->lastSuspend; +} + +template +void +O3ThreadContext::profileClear() +{} + +template +void +O3ThreadContext::profileSample() +{} +#endif + +template +TheISA::MachInst +O3ThreadContext:: getInst() +{ + return thread->getInst(); +} + +template +void +O3ThreadContext::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + unsigned tid = thread->readTid(); + PhysRegIndex renamed_reg; + + // First loop through the integer registers. + for (int i = 0; i < TheISA::NumIntRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(O3CPU, "Copying over register %i, had data %lli, " + "now has data %lli.\n", + renamed_reg, cpu->readIntReg(renamed_reg), + tc->readIntReg(i)); + + cpu->setIntReg(renamed_reg, tc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < TheISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + TheISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + tc->readFloatRegBits(i)); + } + + // Copy the misc regs. + copyMiscRegs(tc, this); + + // Then finally set the PC and the next PC. + cpu->setPC(tc->readPC(), tid); + cpu->setNextPC(tc->readNextPC(), tid); +#if !FULL_SYSTEM + this->thread->funcExeInst = tc->readFuncExeInst(); +#endif +} + +template +void +O3ThreadContext::clearArchRegs() +{} + +template +uint64_t +O3ThreadContext::readIntReg(int reg_idx) +{ + return cpu->readArchIntReg(reg_idx, thread->readTid()); +} + +template +FloatReg +O3ThreadContext::readFloatReg(int reg_idx, int width) +{ + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->readTid()); + default: + panic("Unsupported width!"); + return 0; + } +} + +template +FloatReg +O3ThreadContext::readFloatReg(int reg_idx) +{ + return cpu->readArchFloatRegSingle(reg_idx, thread->readTid()); +} + +template +FloatRegBits +O3ThreadContext::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the TC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template +FloatRegBits +O3ThreadContext::readFloatRegBits(int reg_idx) +{ + return cpu->readArchFloatRegInt(reg_idx, thread->readTid()); +} + +template +void +O3ThreadContext::setIntReg(int reg_idx, uint64_t val) +{ + cpu->setArchIntReg(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatReg(int reg_idx, FloatReg val, int width) +{ + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->readTid()); + break; + } + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatReg(int reg_idx, FloatReg val) +{ + cpu->setArchFloatRegSingle(reg_idx, val, thread->readTid()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the TC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + cpu->setArchFloatRegInt(reg_idx, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setPC(uint64_t val) +{ + cpu->setPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +void +O3ThreadContext::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } +} + +template +Fault +O3ThreadContext::setMiscReg(int misc_reg, const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +template +Fault +O3ThreadContext::setMiscRegWithEffect(int misc_reg, + const MiscReg &val) +{ + Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, + thread->readTid()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->readTid()); + } + + return ret_fault; +} + +#if !FULL_SYSTEM + +template +TheISA::IntReg +O3ThreadContext::getSyscallArg(int i) +{ + return cpu->getSyscallArg(i, thread->readTid()); +} + +template +void +O3ThreadContext::setSyscallArg(int i, IntReg val) +{ + cpu->setSyscallArg(i, val, thread->readTid()); +} + +template +void +O3ThreadContext::setSyscallReturn(SyscallReturn return_value) +{ + cpu->setSyscallReturn(return_value, thread->readTid()); +} + +#endif // FULL_SYSTEM + diff --git a/src/python/m5/objects/AlphaO3CPU.py b/src/python/m5/objects/O3CPU.py similarity index 98% rename from src/python/m5/objects/AlphaO3CPU.py rename to src/python/m5/objects/O3CPU.py index f14f8c88ed..4ecfa8fbd6 100644 --- a/src/python/m5/objects/AlphaO3CPU.py +++ b/src/python/m5/objects/O3CPU.py @@ -2,8 +2,8 @@ from m5 import build_env from m5.config import * from BaseCPU import BaseCPU -class DerivAlphaO3CPU(BaseCPU): - type = 'DerivAlphaO3CPU' +class DerivO3CPU(BaseCPU): + type = 'DerivO3CPU' activity = Param.Unsigned("Initial count") numThreads = Param.Unsigned("number of HW thread contexts") diff --git a/src/sim/process.cc b/src/sim/process.cc index 9cdc5b9f5d..f989300a39 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -358,7 +358,10 @@ LiveProcess::argsInit(int intSize, int pageSize) Addr prog_entry = objFile->entryPoint(); threadContexts[0]->setPC(prog_entry); threadContexts[0]->setNextPC(prog_entry + sizeof(MachInst)); + +#if THE_ISA != ALPHA_ISA //e.g. MIPS or Sparc threadContexts[0]->setNextNPC(prog_entry + (2 * sizeof(MachInst))); +#endif num_processes++; } From 51261196bde3403544631cdb4895c2d2a51c3f1e Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Fri, 30 Jun 2006 20:49:31 -0400 Subject: [PATCH 10/17] now O3CPU is totally independent of the ISA... all alpha specific stuff is the cpu/o3/alpha directory src/cpu/o3/alpha/cpu.cc: src/cpu/o3/alpha/cpu_impl.hh: src/cpu/o3/alpha/impl.hh: filenames src/cpu/o3/alpha/thread_context.hh: public src/cpu/o3/base_dyn_inst.cc: src/cpu/o3/bpred_unit.cc: src/cpu/o3/commit.cc: src/cpu/o3/cpu.cc: src/cpu/o3/decode.cc: src/cpu/o3/fetch.cc: src/cpu/o3/iew.cc: src/cpu/o3/inst_queue.cc: src/cpu/o3/lsq.cc: src/cpu/o3/lsq_unit.cc: src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/rename.cc: src/cpu/o3/rob.cc: use O3CPUImpl ... not Alpha src/cpu/o3/checker_builder.cc: filename --HG-- extra : convert_revision : 6eb739909699ade1e2a9d63637b182413ceebc69 --- src/cpu/o3/alpha/cpu.cc | 2 +- src/cpu/o3/alpha/cpu_impl.hh | 4 ++-- src/cpu/o3/alpha/impl.hh | 8 +++++++- src/cpu/o3/alpha/thread_context.hh | 1 + src/cpu/o3/base_dyn_inst.cc | 4 ++-- src/cpu/o3/bpred_unit.cc | 2 +- src/cpu/o3/checker_builder.cc | 4 ++-- src/cpu/o3/commit.cc | 2 +- src/cpu/o3/cpu.cc | 2 +- src/cpu/o3/decode.cc | 2 +- src/cpu/o3/fetch.cc | 2 +- src/cpu/o3/iew.cc | 2 +- src/cpu/o3/inst_queue.cc | 2 +- src/cpu/o3/lsq.cc | 2 +- src/cpu/o3/lsq_unit.cc | 2 +- src/cpu/o3/mem_dep_unit.cc | 10 +++++----- src/cpu/o3/rename.cc | 2 +- src/cpu/o3/rob.cc | 2 +- 18 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/cpu/o3/alpha/cpu.cc b/src/cpu/o3/alpha/cpu.cc index 87a4d03a77..ed10b2fd11 100644 --- a/src/cpu/o3/alpha/cpu.cc +++ b/src/cpu/o3/alpha/cpu.cc @@ -28,7 +28,7 @@ * Authors: Kevin Lim */ -#include "cpu/o3/alphaimpl.hh" +#include "cpu/o3/alpha/impl.hh" #include "cpu/o3/alpha/cpu_impl.hh" #include "cpu/o3/alpha/dyn_inst.hh" diff --git a/src/cpu/o3/alpha/cpu_impl.hh b/src/cpu/o3/alpha/cpu_impl.hh index 2da683398b..0473e60c23 100644 --- a/src/cpu/o3/alpha/cpu_impl.hh +++ b/src/cpu/o3/alpha/cpu_impl.hh @@ -40,7 +40,7 @@ #include "cpu/o3/alpha/cpu.hh" #include "cpu/o3/alpha/params.hh" -#include "cpu/o3/alpha/tc.hh" +#include "cpu/o3/alpha/thread_context.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/thread_state.hh" @@ -120,7 +120,7 @@ AlphaO3CPU::AlphaO3CPU(Params *params) // CheckerThreadContext. #if USE_CHECKER if (params->checker) { - tc = new CheckerThreadContext>( + tc = new CheckerThreadContext >( alpha_tc, this->checker); } #endif diff --git a/src/cpu/o3/alpha/impl.hh b/src/cpu/o3/alpha/impl.hh index cdcdff34a8..8cd8692c60 100644 --- a/src/cpu/o3/alpha/impl.hh +++ b/src/cpu/o3/alpha/impl.hh @@ -81,8 +81,14 @@ struct AlphaSimpleImpl enum { MaxWidth = 8, - MaxThreads = 2 + MaxThreads = 4 }; }; +/** The O3Impl to be used. */ +typedef AlphaSimpleImpl O3CPUImpl; + +/** The O3Impl to be used. */ +typedef DynInst O3DynInst; + #endif // __CPU_O3_ALPHA_IMPL_HH__ diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 890bff3ffb..57190d65e3 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -34,6 +34,7 @@ template class AlphaTC : public O3ThreadContext { + public: #if FULL_SYSTEM /** Returns a pointer to the ITB. */ virtual AlphaITB *getITBPtr() { return cpu->itb; } diff --git a/src/cpu/o3/base_dyn_inst.cc b/src/cpu/o3/base_dyn_inst.cc index a0089fb8b0..0979c5c8f1 100644 --- a/src/cpu/o3/base_dyn_inst.cc +++ b/src/cpu/o3/base_dyn_inst.cc @@ -32,8 +32,8 @@ #include "cpu/o3/isa_specific.hh" // Explicit instantiation -template class BaseDynInst; +template class BaseDynInst; template <> int -BaseDynInst::instcount = 0; +BaseDynInst::instcount = 0; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index 4087fa07bd..08fd4e8eaf 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -31,4 +31,4 @@ #include "cpu/o3/bpred_unit_impl.hh" #include "cpu/o3/isa_specific.hh" -template class BPredUnit; +template class BPredUnit; diff --git a/src/cpu/o3/checker_builder.cc b/src/cpu/o3/checker_builder.cc index 58c40d00c9..782d963b0c 100644 --- a/src/cpu/o3/checker_builder.cc +++ b/src/cpu/o3/checker_builder.cc @@ -32,8 +32,8 @@ #include "cpu/checker/cpu_impl.hh" #include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha/dyn_inst.hh" +#include "cpu/o3/alpha/impl.hh" #include "sim/builder.hh" #include "sim/process.hh" #include "sim/sim_object.hh" diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 9bbb526dc9..637d59f526 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/commit_impl.hh" -template class DefaultCommit; +template class DefaultCommit; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index c2282d6177..87fee83614 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1211,4 +1211,4 @@ FullO3CPU::updateThreadPriority() } // Forward declaration of FullO3CPU. -template class FullO3CPU; +template class FullO3CPU; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 52d55983a8..896e38331c 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/decode_impl.hh" -template class DefaultDecode; +template class DefaultDecode; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 39b9879a40..d809b07e47 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/fetch_impl.hh" -template class DefaultFetch; +template class DefaultFetch; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index bf8eb61ac7..f99be7fe0a 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -32,4 +32,4 @@ #include "cpu/o3/iew_impl.hh" #include "cpu/o3/inst_queue.hh" -template class DefaultIEW; +template class DefaultIEW; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 88f3f33a05..a539066f93 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -32,4 +32,4 @@ #include "cpu/o3/inst_queue_impl.hh" // Force instantiation of InstructionQueue. -template class InstructionQueue; +template class InstructionQueue; diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index 872576c32a..5279472815 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -32,5 +32,5 @@ #include "cpu/o3/lsq_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ; +template class LSQ; diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index 9b244ac719..3ca3fa6674 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -33,5 +33,5 @@ #include "cpu/o3/lsq_unit_impl.hh" // Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit; +template class LSQUnit; diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 3edac95ac1..6a14dcbff4 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -33,17 +33,17 @@ #include "cpu/o3/mem_dep_unit_impl.hh" // Force instantation of memory dependency unit using store sets and -// AlphaSimpleImpl. -template class MemDepUnit; +// O3CPUImpl. +template class MemDepUnit; #ifdef DEBUG template <> int -MemDepUnit::MemDepEntry::memdep_count = 0; +MemDepUnit::MemDepEntry::memdep_count = 0; template <> int -MemDepUnit::MemDepEntry::memdep_insert = 0; +MemDepUnit::MemDepEntry::memdep_insert = 0; template <> int -MemDepUnit::MemDepEntry::memdep_erase = 0; +MemDepUnit::MemDepEntry::memdep_erase = 0; #endif diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index f972190b77..443ada0cb4 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -31,4 +31,4 @@ #include "cpu/o3/isa_specific.hh" #include "cpu/o3/rename_impl.hh" -template class DefaultRename; +template class DefaultRename; diff --git a/src/cpu/o3/rob.cc b/src/cpu/o3/rob.cc index ccef6b1554..9976049cdb 100644 --- a/src/cpu/o3/rob.cc +++ b/src/cpu/o3/rob.cc @@ -33,4 +33,4 @@ #include "cpu/o3/rob_impl.hh" // Force instantiation of InstructionQueue. -template class ROB; +template class ROB; From 62961a291692c960b67158f4152d480c160f8ac8 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 1 Jul 2006 18:52:02 -0400 Subject: [PATCH 11/17] fix cpu builder to build the correct name... add activateThread event and functions src/cpu/o3/alpha/cpu_builder.cc: Have CPU builder build a DerivO3CPU not a DerivAlphaO3CPU src/cpu/o3/cpu.cc: add activateThread Event add activateThread function adjust activateContext to schedule a thread to activate within the CPU instead of activating thread right away. This will lead to stages trying to use threads that arent ready yet and wasting execution time & possibly performance. src/cpu/o3/cpu.hh: add activateThread Event add activateThread function add schedule/descheculed activate thread event --HG-- extra : convert_revision : 236d30dc160910507ad36f7f527ab185ed38dc04 --- src/cpu/o3/alpha/cpu_builder.cc | 26 ++++++------- src/cpu/o3/cpu.cc | 69 +++++++++++++++++++++++++++------ src/cpu/o3/cpu.hh | 53 +++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 25 deletions(-) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index b1e141ff4e..8190256fbf 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -31,21 +31,21 @@ #include #include "cpu/base.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/alpha/cpu.hh" +#include "cpu/o3/alpha/impl.hh" +#include "cpu/o3/alpha/params.hh" #include "cpu/o3/fu_pool.hh" #include "sim/builder.hh" -class DerivAlphaO3CPU : public AlphaO3CPU +class DerivO3CPU : public AlphaO3CPU { public: - DerivAlphaO3CPU(AlphaSimpleParams *p) + DerivO3CPU(AlphaSimpleParams *p) : AlphaO3CPU(p) { } }; -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) Param clock; Param numThreads; @@ -144,9 +144,9 @@ Param defer_registration; Param function_trace; Param function_trace_start; -END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU) -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) INIT_PARAM(clock, "clock speed"), INIT_PARAM(numThreads, "number of HW thread contexts"), @@ -261,11 +261,11 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) INIT_PARAM(function_trace, "Enable function trace"), INIT_PARAM(function_trace_start, "Cycle to start function trace") -END_INIT_SIM_OBJECT_PARAMS(DerivAlphaO3CPU) +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU) -CREATE_SIM_OBJECT(DerivAlphaO3CPU) +CREATE_SIM_OBJECT(DerivO3CPU) { - DerivAlphaO3CPU *cpu; + DerivO3CPU *cpu; #if FULL_SYSTEM // Full-system only supports a single thread for the moment. @@ -386,10 +386,10 @@ CREATE_SIM_OBJECT(DerivAlphaO3CPU) params->functionTrace = function_trace; params->functionTraceStart = function_trace_start; - cpu = new DerivAlphaO3CPU(params); + cpu = new DerivO3CPU(params); return cpu; } -REGISTER_SIM_OBJECT("DerivAlphaO3CPU", DerivAlphaO3CPU) +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 87fee83614..7ed17a91e8 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -85,6 +85,35 @@ FullO3CPU::TickEvent::description() return "FullO3CPU tick event"; } +template +FullO3CPU::ActivateThreadEvent::ActivateThreadEvent() + : Event(&mainEventQueue, CPU_Tick_Pri) +{ +} + +template +void +FullO3CPU::ActivateThreadEvent::init(int thread_num, + FullO3CPU *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template +void +FullO3CPU::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template +const char * +FullO3CPU::ActivateThreadEvent::description() +{ + return "FullO3CPU \"Activate Thread\" event"; +} + template FullO3CPU::FullO3CPU(Params *params) : BaseO3CPU(params), @@ -257,6 +286,8 @@ FullO3CPU::FullO3CPU(Params *params) lastRunningCycle = curTick; + lastActivatedCycle = -1; + contextSwitch = false; } @@ -574,31 +605,45 @@ FullO3CPU::activateWhenReady(int tid) template void -FullO3CPU::activateContext(int tid, int delay) +FullO3CPU::activateThread(unsigned int tid) { - // Needs to set each stage to running as well. list::iterator isActive = find( activeThreads.begin(), activeThreads.end(), tid); if (isActive == activeThreads.end()) { - //May Need to Re-code this if the delay variable is the - //delay needed for thread to activate - DPRINTF(O3CPU, "Adding Thread %i to active threads list\n", + DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n", tid); activeThreads.push_back(tid); } +} - assert(_status == Idle || _status == SwitchedOut); - scheduleTickEvent(delay); +template +void +FullO3CPU::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + cycles(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } - // Be sure to signal that there's some activity so the CPU doesn't - // deschedule itself. - activityRec.activity(); - fetch.wakeFromQuiesce(); + if(lastActivatedCycle < curTick) { + scheduleTickEvent(delay); - _status = Running; + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 9565bbe4f3..2a9ecff4ed 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -102,6 +102,7 @@ class FullO3CPU : public BaseO3CPU typedef typename std::list::iterator ListIt; friend class O3ThreadContext; + public: enum Status { Running, @@ -114,6 +115,9 @@ class FullO3CPU : public BaseO3CPU /** Overall CPU status. */ Status _status; + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + private: class TickEvent : public Event { @@ -150,6 +154,49 @@ class FullO3CPU : public BaseO3CPU tickEvent.squash(); } + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + int tid; + + /** Pointer to the CPU. */ + FullO3CPU *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, FullO3CPU *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description(); + }; + + /** Schedule thread to activate , regardless of its current state. */ + void scheduleActivateThreadEvent(int tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + activateThreadEvent[tid].reschedule(curTick + cycles(delay)); + else if (!activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].schedule(curTick + cycles(delay)); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void unscheduleActivateThreadEvent(int tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + public: /** Constructs a CPU with the given parameters. */ FullO3CPU(Params *params); @@ -167,6 +214,9 @@ class FullO3CPU : public BaseO3CPU /** Initialize the CPU */ void init(); + /** Add Thread to Active Threads List */ + void activateThread(unsigned int tid); + /** Setup CPU to insert a thread's context */ void insertThread(unsigned tid); @@ -522,6 +572,9 @@ class FullO3CPU : public BaseO3CPU /** The cycle that the CPU was last running, used for statistics. */ Tick lastRunningCycle; + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + /** Number of Threads CPU can process */ unsigned numThreads; From 23cfd9489bdff00f926f5dcfb7dd72333fb11bfb Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sat, 1 Jul 2006 19:02:43 -0400 Subject: [PATCH 12/17] traceflag stuff src/base/traceflags.py: add BaseCPU flag, O3CPUAll flag grouping src/cpu/base.cc: Use BaseCPU flag instead of FullCPU flag --HG-- extra : convert_revision : 32f737a2f58eb936634799f1f809e07cbba90179 --- src/base/traceflags.py | 3 ++- src/cpu/base.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index c4dcb695b2..d51236c46f 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -115,6 +115,7 @@ baseFlags = [ 'MSHR', 'Mbox', 'MemDepUnit', + 'BaseCPU' 'O3CPU', 'OzoneCPU', 'FE', @@ -176,7 +177,7 @@ compoundFlagMap = { 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'], + 'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'O3CPU', 'Activity','Scoreboard','Writeback'], 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] } diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 40cec416be..9df61d2ced 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -68,12 +68,12 @@ BaseCPU::BaseCPU(Params *p) number_of_threads(p->numberOfThreads), system(p->system) #endif { - DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + DPRINTF(BaseCPU, "BaseCPU: Creating object, mem address %#x.\n", this); // add self to global list of CPUs cpuList.push_back(this); - DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + DPRINTF(BaseCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", this); if (number_of_threads > maxThreadsPerCPU) From c8b3d8a1edbab505e5f9748cfa1ee866ed1fb02f Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 2 Jul 2006 23:11:24 -0400 Subject: [PATCH 13/17] Fix default SMT configuration in O3CPU (i.e. fetch policy, workloads/numThreads) Edit Test3 for newmem src/base/traceflags.py: Add O3CPU flag src/cpu/base.cc: for some reason adding a BaseCPU flag doesnt work so just go back to old way... src/cpu/o3/alpha/cpu_builder.cc: Determine number threads by workload size instead of solely by parameter. Default SMT fetch policy to RoundRobin if it's not specified in Config file src/cpu/o3/commit.hh: only use nextNPC for !ALPHA src/cpu/o3/commit_impl.hh: add FetchTrapPending as condition for commit src/cpu/o3/cpu.cc: panic if active threads is more than Impl::MaxThreads src/cpu/o3/fetch.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/rob.hh: src/cpu/o3/rob_impl.hh: name stuff src/cpu/o3/fetch_impl.hh: fatal if try to use SMT branch count, that's unimplemented right now src/python/m5/config.py: make it clearer that a parameter is not valid within a configuration class --HG-- extra : convert_revision : 55069847304e40e257f9225f0dc3894ce6491b34 --- src/base/traceflags.py | 1 + src/cpu/base.cc | 4 ++-- src/cpu/o3/alpha/cpu_builder.cc | 15 ++++++++++++--- src/cpu/o3/commit.hh | 2 ++ src/cpu/o3/commit_impl.hh | 3 ++- src/cpu/o3/cpu.cc | 10 ++++++++-- src/cpu/o3/fetch_impl.hh | 13 +++++++++---- src/cpu/o3/inst_queue.hh | 1 + src/cpu/o3/inst_queue_impl.hh | 5 +++-- src/cpu/o3/rob.hh | 1 + src/cpu/o3/rob_impl.hh | 1 + src/python/m5/config.py | 2 +- 12 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index d51236c46f..327ce60757 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -121,6 +121,7 @@ baseFlags = [ 'FE', 'IBE', 'BE', + 'O3CPU', 'OzoneLSQ', 'PCEvent', 'PCIA', diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 9df61d2ced..40cec416be 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -68,12 +68,12 @@ BaseCPU::BaseCPU(Params *p) number_of_threads(p->numberOfThreads), system(p->system) #endif { - DPRINTF(BaseCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); // add self to global list of CPUs cpuList.push_back(this); - DPRINTF(BaseCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", this); if (number_of_threads > maxThreadsPerCPU) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 8190256fbf..12d083a436 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -274,12 +274,12 @@ CREATE_SIM_OBJECT(DerivO3CPU) // In non-full-system mode, we infer the number of threads from // the workload if it's not explicitly specified. int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); + (numThreads.isValid() && numThreads >= workload.size()) ? + numThreads : workload.size(); if (workload.size() == 0) { fatal("Must specify at least one workload!"); } - #endif AlphaSimpleParams *params = new AlphaSimpleParams; @@ -371,7 +371,16 @@ CREATE_SIM_OBJECT(DerivO3CPU) params->numROBEntries = numROBEntries; params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + params->smtFetchPolicy = single_thread; + else + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; params->smtLSQPolicy = smtLSQPolicy; params->smtLSQThreshold = smtLSQThreshold; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 8603262833..60b555269e 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -406,8 +406,10 @@ class DefaultCommit /** The next PC of each thread. */ Addr nextPC[Impl::MaxThreads]; +#if THE_ISA != ALPHA_ISA /** The next NPC of each thread. */ Addr nextNPC[Impl::MaxThreads]; +#endif /** The sequence number of the youngest valid instruction in the ROB. */ InstSeqNum youngestSeqNum[Impl::MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index cd7dd47d48..06b8e8a954 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1221,7 +1221,8 @@ DefaultCommit::roundRobin() unsigned tid = *pri_iter; if (commitStatus[tid] == Running || - commitStatus[tid] == Idle) { + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { if (rob->isHeadReady(tid)) { priority_list.erase(pri_iter); diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 7ed17a91e8..feca4cdf2f 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -127,7 +127,7 @@ FullO3CPU::FullO3CPU(Params *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs), - freeList(params->numberOfThreads,//number of activeThreads + freeList(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs), @@ -135,7 +135,7 @@ FullO3CPU::FullO3CPU(Params *params) params->smtROBPolicy, params->smtROBThreshold, params->numberOfThreads), - scoreboard(params->numberOfThreads,//number of activeThreads + scoreboard(params->numberOfThreads, TheISA::NumIntRegs, params->numPhysIntRegs, TheISA::NumFloatRegs, params->numPhysFloatRegs, TheISA::NumMiscRegs * number_of_threads, @@ -221,6 +221,12 @@ FullO3CPU::FullO3CPU(Params *params) #if !FULL_SYSTEM int active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or " + "edit your workload size."); + } #else int active_threads = 1; #endif diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index e570dbb18d..60eb76d178 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -114,8 +114,6 @@ DefaultFetch::DefaultFetch(Params *params) if (numThreads > Impl::MaxThreads) fatal("numThreads is not a valid value\n"); - DPRINTF(Fetch, "Fetch constructor called\n"); - // Set fetch stage's status to inactive. _status = Inactive; @@ -128,6 +126,8 @@ DefaultFetch::DefaultFetch(Params *params) // Figure out fetch policy if (policy == "singlethread") { fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); } else if (policy == "roundrobin") { fetchPolicy = RoundRobin; DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); @@ -559,7 +559,7 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid return false; } - DPRINTF(Fetch, "Doing cache access.\n"); + DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); lastIcacheStall[tid] = curTick; @@ -724,12 +724,15 @@ DefaultFetch::tick() // Reset the number of the instruction we're fetching. numInst = 0; +#if FULL_SYSTEM if (fromCommit->commitInfo[0].interruptPending) { interruptPending = true; } + if (fromCommit->commitInfo[0].clearInterrupt) { interruptPending = false; } +#endif for (threadFetched = 0; threadFetched < numFetchingThreads; threadFetched++) { @@ -903,6 +906,8 @@ DefaultFetch::fetch(bool &status_change) return; } + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + // The current PC. Addr &fetch_PC = PC[tid]; @@ -1279,6 +1284,6 @@ int DefaultFetch::branchCount() { list::iterator threads = (*activeThreads).begin(); - warn("Branch Count Fetch policy unimplemented\n"); + panic("Branch Count Fetch policy unimplemented\n"); return *threads; } diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index d745faf7bd..4c69ca3844 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 1ef1b2cffe..b99bd09000 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include @@ -125,7 +126,7 @@ InstructionQueue::InstructionQueue(Params *params) maxEntries[i] = part_amt; } - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" "%i entries per thread.\n",part_amt); } else if (policy == "threshold") { @@ -140,7 +141,7 @@ InstructionQueue::InstructionQueue(Params *params) maxEntries[i] = thresholdIQ; } - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + DPRINTF(IQ, "IQ sharing policy set to Threshold:" "%i entries per thread.\n",thresholdIQ); } else { assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index b98d7c4c2a..6f8080ef44 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #ifndef __CPU_O3_ROB_HH__ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 6277dd68be..d9978b17f7 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -26,6 +26,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Kevin Lim + * Korey Sewell */ #include "config/full_system.hh" diff --git a/src/python/m5/config.py b/src/python/m5/config.py index adabe07439..6f2873d40b 100644 --- a/src/python/m5/config.py +++ b/src/python/m5/config.py @@ -274,7 +274,7 @@ class MetaSimObject(type): cls._values[attr] = value else: raise AttributeError, \ - "Class %s has no parameter %s" % (cls.__name__, attr) + "Class %s has no parameter \'%s\'" % (cls.__name__, attr) def __getattr__(cls, attr): if cls._values.has_key(attr): From cf58578ba120186947f382893d2e370bd6e436ce Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Sun, 2 Jul 2006 23:27:13 -0400 Subject: [PATCH 14/17] typo ... change 'single_thread' to 'round_robin_policy' --HG-- extra : convert_revision : a4a5cb90557f786d42c6178bc6e268312c5ecbee --- src/cpu/o3/alpha/cpu_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/alpha/cpu_builder.cc b/src/cpu/o3/alpha/cpu_builder.cc index 12d083a436..490305cbf2 100644 --- a/src/cpu/o3/alpha/cpu_builder.cc +++ b/src/cpu/o3/alpha/cpu_builder.cc @@ -377,7 +377,7 @@ CREATE_SIM_OBJECT(DerivO3CPU) std::string single_thread = "SingleThread"; if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) - params->smtFetchPolicy = single_thread; + params->smtFetchPolicy = round_robin_policy; else params->smtFetchPolicy = smtFetchPolicy; From 19083bc4ce379c03b39ba941c18b11a88b141e18 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Mon, 3 Jul 2006 01:10:19 -0400 Subject: [PATCH 15/17] Added hook to check for SMT workloads. SMT is identified by adding a semicolon between the workloads. Now SMT on the O3CPU can be invoked by "/ALPHA_SE/m5.debug ../configs/test/test.py -d --cmd="hello;hello" -i="file1;file2" I think I am a novice python magician now!!!!.... configs/test/test.py: Added hook to check for SMT workloads. SMT is identified by adding a semicolon between the workloads. Now SMT on the O3CPU can be invoked by "/ALPHA_SE/m5.debug ../configs/test/test.py -d --cmd="hello;hello" --input="file1;file2" (btw, We are back to working for this double hello world case) I am a novice python magician now!!!!.... --HG-- extra : convert_revision : b55e10dce33f5a9dc4c78f90409ec0912bad4292 --- configs/test/test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/configs/test/test.py b/configs/test/test.py index 3095cd1d12..625304a08a 100644 --- a/configs/test/test.py +++ b/configs/test/test.py @@ -51,6 +51,25 @@ if options.timing and options.detailed: if options.timing: cpu = TimingSimpleCPU() elif options.detailed: + #check for SMT workload + workloads = options.cmd.split(';') + if len(workloads) > 1: + process = [] + smt_idx = 0 + inputs = [] + + if options.input != "": + inputs = options.input.split(';') + + for wrkld in workloads: + smt_process = LiveProcess() + smt_process.executable = os.path.join(this_dir, wrkld) + smt_process.cmd = wrkld + " " + options.options + if inputs and inputs[smt_idx]: + smt_process.input = inputs[smt_idx] + process += [smt_process, ] + smt_idx += 1 + cpu = DetailedO3CPU() else: cpu = AtomicSimpleCPU() From f4c5609988731f52f9c5bd84ee2db364bbf6fd97 Mon Sep 17 00:00:00 2001 From: Korey Sewell Date: Mon, 3 Jul 2006 12:19:35 -0400 Subject: [PATCH 16/17] Fix for FS O3CPU compile ... missing forward class declaration/header file after files got split for ISA-independence src/cpu/o3/alpha/thread_context.hh: Use 'this' when accessing cpu src/cpu/o3/cpu.hh: add numActiveThreds function src/cpu/o3/thread_context.hh: forward class declarations src/cpu/o3/thread_context_impl.hh: add quiesce event header file src/cpu/thread_context.hh: add exit() function to thread context (read comments in file) src/sim/syscall_emul.cc: adjust exitFunc syscall --HG-- extra : convert_revision : 323dc871e2b4f4ee5036be388ceb6634cd85a83e --- src/cpu/o3/alpha/thread_context.hh | 24 ++++++++++++++++++++---- src/cpu/o3/cpu.hh | 4 ++++ src/cpu/o3/thread_context.hh | 7 +++++++ src/cpu/o3/thread_context_impl.hh | 1 + src/cpu/thread_context.hh | 5 +++++ src/sim/syscall_emul.cc | 5 +++-- 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/cpu/o3/alpha/thread_context.hh b/src/cpu/o3/alpha/thread_context.hh index 57190d65e3..78b0ee7882 100644 --- a/src/cpu/o3/alpha/thread_context.hh +++ b/src/cpu/o3/alpha/thread_context.hh @@ -37,21 +37,21 @@ class AlphaTC : public O3ThreadContext public: #if FULL_SYSTEM /** Returns a pointer to the ITB. */ - virtual AlphaITB *getITBPtr() { return cpu->itb; } + virtual AlphaITB *getITBPtr() { return this->cpu->itb; } /** Returns a pointer to the DTB. */ - virtual AlphaDTB *getDTBPtr() { return cpu->dtb; } + virtual AlphaDTB *getDTBPtr() { return this->cpu->dtb; } /** Returns pointer to the quiesce event. */ virtual EndQuiesceEvent *getQuiesceEvent() { - return thread->quiesceEvent; + return this->thread->quiesceEvent; } /** Returns if the thread is currently in PAL mode, based on * the PC's value. */ virtual bool inPalMode() - { return TheISA::PcPAL(cpu->readPC(thread->readTid())); } + { return TheISA::PcPAL(this->cpu->readPC(this->thread->readTid())); } #endif virtual uint64_t readNextNPC() @@ -68,4 +68,20 @@ class AlphaTC : public O3ThreadContext virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, TheISA::RegFile::ContextVal val) { panic("Not supported on Alpha!"); } + + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread executes the 'exit' + // syscall. + virtual int exit() + { + this->cpu->deallocateContext(this->thread->readTid()); + + // If there are still threads executing in the system + if (this->cpu->numActiveThreads()) + return 0; + else + return 1; + } }; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2a9ecff4ed..1cff6142d3 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -214,6 +214,10 @@ class FullO3CPU : public BaseO3CPU /** Initialize the CPU */ void init(); + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + /** Add Thread to Active Threads List */ void activateThread(unsigned int tid); diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index d60867029a..d097ee63eb 100755 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -34,6 +34,13 @@ #include "cpu/o3/isa_specific.hh" +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + /** * Derived ThreadContext class for use with the O3CPU. It * provides the interface for any external objects to access a diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index fccabaf363..cfb71f6238 100755 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -30,6 +30,7 @@ */ #include "cpu/o3/thread_context.hh" +#include "cpu/quiesce_event.hh" using namespace TheISA; diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 3c79e11168..70d7051441 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -247,6 +247,11 @@ class ThreadContext // Same with st cond failures. virtual Counter readFuncExeInst() = 0; + + // This function exits the thread context in the CPU and returns + // 1 if the CPU has no more active threads (meaning it's OK to exit); + // Used in syscall-emulation mode when a thread calls the exit syscall. + virtual int exit() { return 1; }; #endif virtual void changeRegFileContext(RegFile::ContextParam param, diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc index 848b6f8696..e72890612d 100644 --- a/src/sim/syscall_emul.cc +++ b/src/sim/syscall_emul.cc @@ -27,7 +27,6 @@ * * Authors: Steve Reinhardt * Ali Saidi - * Korey Sewell */ #include @@ -92,7 +91,9 @@ SyscallReturn exitFunc(SyscallDesc *desc, int callnum, Process *process, ThreadContext *tc) { - exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + if (tc->exit()) { + exitSimLoop("target called exit()", tc->getSyscallArg(0) & 0xff); + } return 1; } From 4201ec84b2dd7d96148bf661124dd7b5d0e7204b Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 5 Jul 2006 15:13:27 -0400 Subject: [PATCH 17/17] Fix some unset values in the request in the timing CPU. Properly implement the MSHR allocate function. src/cpu/simple/timing.cc: Set the thread context in the CPU. Need to do this properly, currently I just set it to Cpu=0 Thread=0. This will just cause all the stats in the cache based on these to just yield totals and not a distribution. src/mem/cache/miss/mshr.cc: Properly implement the allocate function for the MSHR. --HG-- extra : convert_revision : bcece518e54ed1404db3196f996a77b4dd5c1c1e --- src/cpu/simple/timing.cc | 4 +++- src/mem/cache/miss/mshr.cc | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc index 0729f94898..d5bdcfa9b1 100644 --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -207,7 +207,7 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags) { // need to fill in CPU & thread IDs here Request *data_read_req = new Request(); - + data_read_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_read_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); if (traceData) { @@ -288,6 +288,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) { // need to fill in CPU & thread IDs here Request *data_write_req = new Request(); + data_write_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE data_write_req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); // translate to physical address @@ -371,6 +372,7 @@ TimingSimpleCPU::fetch() // need to fill in CPU & thread IDs here Request *ifetch_req = new Request(); + ifetch_req->setThreadContext(0,0); //Need CPU/Thread IDS HERE Fault fault = setupFetchRequest(ifetch_req); ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc index 05a2fe1c59..1a85d3018c 100644 --- a/src/mem/cache/miss/mshr.cc +++ b/src/mem/cache/miss/mshr.cc @@ -57,26 +57,26 @@ void MSHR::allocate(Packet::Command cmd, Addr _addr, int _asid, int size, Packet * &target) { - assert("NEED TO FIX YET\n" && 0); -#if 0 - assert(targets.empty()); - addr = _addr; - asid = _asid; - - pkt = new Packet(); // allocate new memory request - pkt->addr = addr; //picked physical address for now - pkt->cmd = cmd; - pkt->size = size; - pkt->data = new uint8_t[size]; - pkt->senderState = this; - //Set the time here for latency calculations - pkt->time = curTick; - - if (target) { - pkt->req = target->req; + if (target) + { + //Have a request, just use it + pkt = new Packet(target->req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; allocateTarget(target); } -#endif + else + { + //need a request first + Request * req = new Request(); + req->setPhys(addr, size, 0); + //Thread context?? + pkt = new Packet(req, cmd, Packet::Broadcast, size); + pkt->time = curTick; + pkt->allocate(); + pkt->senderState = (Packet::SenderState *)this; + } } // Since we aren't sure if data is being used, don't copy here.