Merge ktlim@zizzer:/bk/newmem

into zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem src/cpu/memtest/memtest.cc: src/cpu/memtest/memtest.hh: src/cpu/simple/timing.hh: tests/configs/o3-timing-mp.py: Hand merge. --HG-- extra : convert_revision : a58cc439eb5e8f900d175ed8b5a85b6c8723e558
2006-10-09 22:59:56 -04:00
parent a9ae6c8656 5448517da4
commit bdde892d66
138 changed files with 2120 additions and 1244 deletions
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -67,6 +67,47 @@ Bus::init()
        (*intIter)->sendStatusChange(Port::RangeChange);
 }

+Bus::BusFreeEvent::BusFreeEvent(Bus *_bus) : Event(&mainEventQueue), bus(_bus)
+{
+    assert(!scheduled());
+}
+
+void Bus::BusFreeEvent::process()
+{
+    bus->recvRetry(0);
+}
+
+const char * Bus::BusFreeEvent::description()
+{
+    return "bus became available";
+}
+
+void
+Bus::occupyBus(int numCycles)
+{
+    //Move up when the bus will next be free
+    //We avoid the use of divide by adding repeatedly
+    //This should be faster if the value is updated frequently, but should
+    //be may be slower otherwise.
+
+    //Bring tickNextIdle up to the present tick
+    //There is some potential ambiguity where a cycle starts, which might make
+    //a difference when devices are acting right around a cycle boundary. Using
+    //a < allows things which happen exactly on a cycle boundary to take up only
+    //the following cycle. Anthing that happens later will have to "wait" for the
+    //end of that cycle, and then start using the bus after that.
+    while (tickNextIdle < curTick)
+        tickNextIdle += clock;
+    //Advance it numCycles bus cycles.
+    //XXX Should this use the repeating add trick as well?
+    tickNextIdle += (numCycles * clock);
+    if (!busIdle.scheduled()) {
+        busIdle.schedule(tickNextIdle);
+    } else {
+        busIdle.reschedule(tickNextIdle);
+    }
+    DPRINTF(Bus, "The bus is now occupied from tick %d to %d\n", curTick, tickNextIdle);
+}

 /** Function called by the port when the bus is receiving a Timing
 * transaction.*/
@@ -77,17 +118,26 @@ Bus::recvTiming(Packet *pkt)
    DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());

+    Port *pktPort = interfaces[pkt->getSrc()];
+
    short dest = pkt->getDest();
    if (dest == Packet::Broadcast) {
-        if ( timingSnoopPhase1(pkt) )
-        {
-            timingSnoopPhase2(pkt);
+        if (timingSnoop(pkt)) {
+            pkt->flags |= SNOOP_COMMIT;
+            bool success = timingSnoop(pkt);
+            assert(success);
+            if (pkt->flags & SATISFIED) {
+                //Cache-Cache transfer occuring
+                if (retryingPort) {
+                    retryList.pop_front();
+                    retryingPort = NULL;
+                }
+                return true;
+            }
            port = findPort(pkt->getAddr(), pkt->getSrc());
-        }
-        else
-        {
+        } else {
            //Snoop didn't succeed
-            retryList.push_back(interfaces[pkt->getSrc()]);
+            addToRetryList(pktPort);
            return false;
        }
    } else {
@@ -95,35 +145,61 @@ Bus::recvTiming(Packet *pkt)
        assert(dest != pkt->getSrc()); // catch infinite loops
        port = interfaces[dest];
    }
+
+    // The packet will be sent. Figure out how long it occupies the bus.
+    int numCycles = 0;
+    // Requests need one cycle to send an address
+    if (pkt->isRequest())
+        numCycles++;
+    else if (pkt->isResponse() || pkt->hasData()) {
+        // If a packet has data, it needs ceil(size/width) cycles to send it
+        // We're using the "adding instead of dividing" trick again here
+        if (pkt->hasData()) {
+            int dataSize = pkt->getSize();
+            for (int transmitted = 0; transmitted < dataSize;
+                    transmitted += width) {
+                numCycles++;
+            }
+        } else {
+            // If the packet didn't have data, it must have been a response.
+            // Those use the bus for one cycle to send their data.
+            numCycles++;
+        }
+    }
+
+    occupyBus(numCycles);
+
    if (port->sendTiming(pkt))  {
-        // packet was successfully sent, just return true.
+        // Packet was successfully sent. Return true.
+        // Also take care of retries
+        if (retryingPort) {
+            retryList.pop_front();
+            retryingPort = NULL;
+        }
        return true;
    }

-    // packet not successfully sent
-    retryList.push_back(interfaces[pkt->getSrc()]);
+    // Packet not successfully sent. Leave or put it on the retry list.
+    addToRetryList(pktPort);
    return false;
 }

 void
 Bus::recvRetry(int id)
 {
-    // Go through all the elements on the list calling sendRetry on each
-    // This is not very efficient at all but it works. Ultimately we should end
-    // up with something that is more intelligent.
-    int initialSize = retryList.size();
-    int i;
-    Port *p;
-
-    for (i = 0; i < initialSize; i++) {
-        assert(retryList.size() > 0);
-        p = retryList.front();
-        retryList.pop_front();
-        p->sendRetry();
+    // If there's anything waiting...
+    if (retryList.size()) {
+        retryingPort = retryList.front();
+        retryingPort->sendRetry();
+        // If the retryingPort pointer isn't null, sendTiming wasn't called
+        if (retryingPort) {
+            warn("sendRetry didn't call sendTiming\n");
+            retryList.pop_front();
+            retryingPort = NULL;
+        }
    }
 }

-
 Port *
 Bus::findPort(Addr addr, int id)
 {
@@ -194,43 +270,33 @@ Bus::atomicSnoop(Packet *pkt)
    }
 }

+void
+Bus::functionalSnoop(Packet *pkt)
+{
+    std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
+
+    while (!ports.empty())
+    {
+        interfaces[ports.back()]->sendFunctional(pkt);
+        ports.pop_back();
+    }
+}
+
 bool
-Bus::timingSnoopPhase1(Packet *pkt)
+Bus::timingSnoop(Packet *pkt)
 {
    std::vector<int> ports = findSnoopPorts(pkt->getAddr(), pkt->getSrc());
    bool success = true;

    while (!ports.empty() && success)
    {
-        snoopCallbacks.push_back(ports.back());
        success = interfaces[ports.back()]->sendTiming(pkt);
        ports.pop_back();
    }
-    if (!success)
-    {
-        while (!snoopCallbacks.empty())
-        {
-            interfaces[snoopCallbacks.back()]->sendStatusChange(Port::SnoopSquash);
-            snoopCallbacks.pop_back();
-        }
-        return false;
-    }
-    return true;
+
+    return success;
 }

-void
-Bus::timingSnoopPhase2(Packet *pkt)
-{
-    bool success;
-    pkt->flags |= SNOOP_COMMIT;
-    while (!snoopCallbacks.empty())
-    {
-        success = interfaces[snoopCallbacks.back()]->sendTiming(pkt);
-        //We should not fail on snoop callbacks
-        assert(success);
-        snoopCallbacks.pop_back();
-    }
-}

 /** Function called by the port when the bus is receiving a Atomic
 * transaction.*/
@@ -252,7 +318,7 @@ Bus::recvFunctional(Packet *pkt)
    DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
    assert(pkt->getDest() == Packet::Broadcast);
-    atomicSnoop(pkt);
+    functionalSnoop(pkt);
    findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt);
 }

@@ -381,16 +447,20 @@ Bus::addressRanges(AddrRangeList &resp, AddrRangeList &snoop, int id)
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bus)

    Param<int> bus_id;
+    Param<int> clock;
+    Param<int> width;

 END_DECLARE_SIM_OBJECT_PARAMS(Bus)

 BEGIN_INIT_SIM_OBJECT_PARAMS(Bus)
-    INIT_PARAM(bus_id, "a globally unique bus id")
+    INIT_PARAM(bus_id, "a globally unique bus id"),
+    INIT_PARAM(clock, "bus clock speed"),
+    INIT_PARAM(width, "width of the bus (bits)")
 END_INIT_SIM_OBJECT_PARAMS(Bus)

 CREATE_SIM_OBJECT(Bus)
 {
-    return new Bus(getInstanceName(), bus_id);
+    return new Bus(getInstanceName(), bus_id, clock, width);
 }

 REGISTER_SIM_OBJECT("Bus", Bus)
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -46,11 +46,18 @@
 #include "mem/packet.hh"
 #include "mem/port.hh"
 #include "mem/request.hh"
+#include "sim/eventq.hh"

 class Bus : public MemObject
 {
    /** a globally unique id for this bus. */
    int busId;
+    /** the clock speed for the bus */
+    int clock;
+    /** the width of the bus in bytes */
+    int width;
+    /** the next tick at which the bus will be idle */
+    Tick tickNextIdle;

    static const int defaultId = -1;

@@ -62,9 +69,6 @@ class Bus : public MemObject
    AddrRangeList defaultRange;
    std::vector<DevMap> portSnoopList;

-    std::vector<int> snoopCallbacks;
-
-
    /** Function called by the port when the bus is recieving a Timing
      transaction.*/
    bool recvTiming(Packet *pkt);
@@ -105,16 +109,14 @@ class Bus : public MemObject
    /** Snoop all relevant ports atomicly. */
    void atomicSnoop(Packet *pkt);

-    /** Snoop for NACK and Blocked in phase 1
+    /** Snoop all relevant ports functionally. */
+    void functionalSnoop(Packet *pkt);
+
+    /** Call snoop on caches, be sure to set SNOOP_COMMIT bit if you want
+     * the snoop to happen
     * @return True if succeds.
     */
-    bool timingSnoopPhase1(Packet *pkt);
-
-    /** @todo Don't need to commit all snoops just those that need it
-     *(register somehow). */
-    /** Commit all snoops now that we know if any of them would have blocked.
-     */
-    void timingSnoopPhase2(Packet *pkt);
+    bool timingSnoop(Packet *pkt);

    /** Process address range request.
     * @param resp addresses that we can respond to
@@ -181,6 +183,22 @@ class Bus : public MemObject

    };

+    class BusFreeEvent : public Event
+    {
+        Bus * bus;
+
+      public:
+        BusFreeEvent(Bus * _bus);
+        void process();
+        const char *description();
+    };
+
+    BusFreeEvent busIdle;
+
+    void occupyBus(int numCycles);
+
+    Port * retryingPort;
+
    /** An array of pointers to the peer port interfaces
        connected to this bus.*/
    std::vector<Port*> interfaces;
@@ -189,6 +207,23 @@ class Bus : public MemObject
     * original send failed for whatever reason.*/
    std::list<Port*> retryList;

+    void addToRetryList(Port * port)
+    {
+        if (!retryingPort) {
+            // The device wasn't retrying a packet, or wasn't at an appropriate
+            // time.
+            retryList.push_back(port);
+        } else {
+            // The device was retrying a packet. It didn't work, so we'll leave
+            // it at the head of the retry list.
+            retryingPort = NULL;
+
+            // We shouldn't be receiving a packet from one port when a different
+            // one is retrying.
+            assert(port == retryingPort);
+        }
+    }
+
    /** Port that handles requests that don't match any of the interfaces.*/
    Port *defaultPort;

@@ -199,8 +234,14 @@ class Bus : public MemObject

    virtual void init();

-    Bus(const std::string &n, int bus_id)
-        : MemObject(n), busId(bus_id), defaultPort(NULL)  {}
+    Bus(const std::string &n, int bus_id, int _clock, int _width)
+        : MemObject(n), busId(bus_id), clock(_clock), width(_width),
+        tickNextIdle(0), busIdle(this), retryingPort(NULL), defaultPort(NULL)
+    {
+        //Both the width and clock period must be positive
+        assert(width);
+        assert(clock);
+    }

 };

--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -71,7 +71,7 @@ BaseCache::CachePort::deviceBlockSize()
 bool
 BaseCache::CachePort::recvTiming(Packet *pkt)
 {
-    if (blocked)
+    if (pkt->isRequest() && blocked)
    {
        DPRINTF(Cache,"Scheduling a retry while blocked\n");
        mustSendRetry = true;
@@ -105,14 +105,14 @@ BaseCache::CachePort::recvRetry()
                drainList.pop_front();
        }
    }
-
-    if (!isCpuSide)
+    else if (!isCpuSide)
    {
        pkt = cache->getPacket();
+        MSHR* mshr = (MSHR*)pkt->senderState;
        bool success = sendTiming(pkt);
        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                pkt->getAddr(), success ? "succesful" : "unsuccesful");
-        cache->sendResult(pkt, success);
+        cache->sendResult(pkt, mshr, success);
        if (success && cache->doMasterRequest())
        {
            //Still more to issue, rerequest in 1 cycle
@@ -123,7 +123,9 @@ BaseCache::CachePort::recvRetry()
    }
    else
    {
-        pkt = cache->getCoherencePacket();
+        //pkt = cache->getCoherencePacket();
+        //We save the packet, no reordering on CSHRS
+        pkt = cshrRetry;
        bool success = sendTiming(pkt);
        if (success && cache->doSlaveRequest())
        {
@@ -182,10 +184,11 @@ BaseCache::CacheEvent::process()
        {
            //MSHR
            pkt = cachePort->cache->getPacket();
+            MSHR* mshr = (MSHR*) pkt->senderState;
            bool success = cachePort->sendTiming(pkt);
            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
                    pkt->getAddr(), success ? "succesful" : "unsuccesful");
-            cachePort->cache->sendResult(pkt, success);
+            cachePort->cache->sendResult(pkt, mshr, success);
            if (success && cachePort->cache->doMasterRequest())
            {
                //Still more to issue, rerequest in 1 cycle
@@ -198,7 +201,11 @@ BaseCache::CacheEvent::process()
            //CSHR
            pkt = cachePort->cache->getCoherencePacket();
            bool success = cachePort->sendTiming(pkt);
-            if (success && cachePort->cache->doSlaveRequest())
+            if (!success) {
+                //Need to send on a retry
+                cachePort->cshrRetry = pkt;
+            }
+            else if (cachePort->cache->doSlaveRequest())
            {
                //Still more to issue, rerequest in 1 cycle
                pkt = NULL;
@@ -209,7 +216,10 @@ BaseCache::CacheEvent::process()
    }
    //Response
    //Know the packet to send
-    pkt->result = Packet::Success;
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
    pkt->makeTimingResponse();
    if (!cachePort->drainList.empty()) {
        //Already blocked waiting for bus, just append
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -72,6 +72,7 @@ enum RequestCause{
    Request_PF
 };

+class MSHR;
 /**
 * A basic cache interface. Implements some common functions for speed.
 */
@@ -112,6 +113,8 @@ class BaseCache : public MemObject
        bool isCpuSide;

        std::list<Packet *> drainList;
+
+        Packet *cshrRetry;
    };

    struct CacheEvent : public Event
@@ -156,7 +159,7 @@ class BaseCache : public MemObject
        if (status == Port::RangeChange){
            if (!isCpuSide) {
                cpuSidePort->sendStatusChange(Port::RangeChange);
-                if (topLevelCache && !snoopRangesSent) {
+                if (!snoopRangesSent) {
                    snoopRangesSent = true;
                    memSidePort->sendStatusChange(Port::RangeChange);
                }
@@ -165,10 +168,6 @@ class BaseCache : public MemObject
                memSidePort->sendStatusChange(Port::RangeChange);
            }
        }
-        else if (status == Port::SnoopSquash) {
-            assert(snoopPhase2);
-            snoopPhase2 = false;
-        }
    }

    virtual Packet *getPacket()
@@ -181,7 +180,7 @@ class BaseCache : public MemObject
        fatal("No implementation");
    }

-    virtual void sendResult(Packet* &pkt, bool success)
+    virtual void sendResult(Packet* &pkt, MSHR* mshr, bool success)
    {

        fatal("No implementation");
@@ -215,9 +214,6 @@ class BaseCache : public MemObject
    bool topLevelCache;


-    /** True if we are now in phase 2 of the snoop process. */
-    bool snoopPhase2;
-
    /** Stores time the cache blocked for statistics. */
    Tick blockedCycle;

@@ -523,8 +519,10 @@ class BaseCache : public MemObject
     */
    void respond(Packet *pkt, Tick time)
    {
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
    }

    /**
@@ -537,8 +535,10 @@ class BaseCache : public MemObject
        if (!pkt->req->isUncacheable()) {
            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += time - pkt->time;
        }
-        CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-        reqCpu->schedule(time);
+        if (pkt->needsResponse()) {
+            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
+            reqCpu->schedule(time);
+        }
    }

    /**
@@ -549,6 +549,7 @@ class BaseCache : public MemObject
    {
 //        assert("Implement\n" && 0);
 //	mi->respond(pkt,curTick + hitLatency);
+        assert (pkt->needsResponse());
        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
        reqMem->schedule(time);
    }
@@ -570,14 +571,14 @@ class BaseCache : public MemObject
        {
            //This is where snoops get updated
            AddrRangeList dummy;
-            if (!topLevelCache)
-            {
+//            if (!topLevelCache)
+//            {
                cpuSidePort->getPeerAddressRanges(dummy, snoop);
-            }
-            else
-            {
-                snoop.push_back(RangeSize(0,-1));
-            }
+//            }
+//            else
+//            {
+//                snoop.push_back(RangeSize(0,-1));
+//            }

            return;
        }
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -175,7 +175,7 @@ class Cache : public BaseCache
     * @param pkt The request.
     * @param success True if the request was sent successfully.
     */
-    virtual void sendResult(Packet * &pkt, bool success);
+    virtual void sendResult(Packet * &pkt, MSHR* mshr, bool success);

    /**
     * Handles a response (cache line fill/write ack) from the bus.
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -60,28 +60,20 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
 {
    if (isCpuSide)
    {
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
            pkt->req->setScResult(1);
        }
-        if (!(pkt->flags & SATISFIED)) {
-            access(pkt);
-        }
+        access(pkt);
+
    }
    else
    {
        if (pkt->isResponse())
            handleResponse(pkt);
        else {
-            //Check if we are in phase1
-            if (!snoopPhase2) {
-                snoopPhase2 = true;
-            }
-            else {
-                //Check if we should do the snoop
-                if (pkt->flags && SNOOP_COMMIT)
-                    snoop(pkt);
-                snoopPhase2 = false;
-            }
+            //Check if we should do the snoop
+            if (pkt->flags & SNOOP_COMMIT)
+                snoop(pkt);
        }
    }
    return true;
@@ -95,7 +87,7 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
    if (isCpuSide)
    {
        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
            pkt->req->setScResult(1);
        }

@@ -125,7 +117,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
        pkt->req->setThreadContext(0,0);

        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
+        if (pkt->isWrite() && (pkt->req->isLocked())) {
            assert("Can't handle LL/SC on functional path\n");
        }

@@ -211,9 +203,8 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
                    pkt->getAddr() & (((ULL(1))<<48)-1),
                    pkt->getAddr() & ~((Addr)blkSize - 1));

-            //@todo Should this return latency have the hit latency in it?
-//	    respond(pkt,curTick+lat);
            pkt->flags |= SATISFIED;
+            //Invalidates/Upgrades need no response if they get the bus
 //            return MA_HIT; //@todo, return values
            return true;
        }
@@ -243,9 +234,9 @@ Cache<TagStore,Buffering,Coherence>::access(PacketPtr &pkt)
        missQueue->doWriteback(writebacks.front());
        writebacks.pop_front();
    }
-    DPRINTF(Cache, "%s %x %s blk_addr: %x pc %x\n", pkt->cmdString(),
+    DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(),
            pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss",
-            pkt->getAddr() & ~((Addr)blkSize - 1), pkt->req->getPC());
+            pkt->getAddr() & ~((Addr)blkSize - 1));
    if (blk) {
        // Hit
        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
@@ -294,10 +285,10 @@ Cache<TagStore,Buffering,Coherence>::getPacket()

 template<class TagStore, class Buffering, class Coherence>
 void
-Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, bool success)
+Cache<TagStore,Buffering,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr, bool success)
 {
    if (success) {
-        missQueue->markInService(pkt);
+              missQueue->markInService(pkt, mshr);
          //Temp Hack for UPGRADES
          if (pkt->cmd == Packet::UpgradeReq) {
              handleResponse(pkt);
@@ -313,6 +304,13 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
 {
    BlkType *blk = NULL;
    if (pkt->senderState) {
+        if (pkt->result == Packet::Nacked) {
+            pkt->reinitFromRequest();
+            panic("Unimplemented NACK of packet\n");
+        }
+        if (pkt->result == Packet::BadAddress) {
+            //Make the response a Bad address and send it
+        }
 //	MemDebug::cacheResponse(pkt);
        DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(),
                pkt->getAddr() & (((ULL(1))<<48)-1));
@@ -321,9 +319,11 @@ Cache<TagStore,Buffering,Coherence>::handleResponse(Packet * &pkt)
            blk = tags->findBlock(pkt);
            CacheBlk::State old_state = (blk) ? blk->status : 0;
            PacketList writebacks;
+            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
+            DPRINTF(Cache, "Block for blk addr %x moving from state %i to %i\n",
+                    pkt->getAddr() & (((ULL(1))<<48)-1), old_state, new_state);
            blk = tags->handleFill(blk, (MSHR*)pkt->senderState,
-                                   coherence->getNewState(pkt,old_state),
-                                   writebacks, pkt);
+                                   new_state, writebacks, pkt);
            while (!writebacks.empty()) {
                    missQueue->doWriteback(writebacks.front());
                    writebacks.pop_front();
@@ -394,9 +394,9 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                    //If the outstanding request was an invalidate (upgrade,readex,..)
                    //Then we need to ACK the request until we get the data
                    //Also NACK if the outstanding request is not a cachefill (writeback)
+                    assert(!(pkt->flags & SATISFIED));
                    pkt->flags |= SATISFIED;
                    pkt->flags |= NACKED_LINE;
-                    assert("Don't detect these on the other side yet\n");
                    respondToSnoop(pkt, curTick + hitLatency);
                    return;
                }
@@ -410,7 +410,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                    //@todo Make it so that a read to a pending read can't be exclusive now.

                    //Set the address so find match works
-                    assert("Don't have invalidates yet\n");
+                    panic("Don't have invalidates yet\n");
                    invalidatePkt->addrOverride(pkt->getAddr());

                    //Append the invalidate on
@@ -433,6 +433,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
                    if (pkt->isRead()) {
                        //Only Upgrades don't get here
                        //Supply the data
+                        assert(!(pkt->flags & SATISFIED));
                        pkt->flags |= SATISFIED;

                        //If we are in an exclusive protocol, make it ask again
@@ -451,7 +452,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)

                    if (pkt->isInvalidate()) {
                        //This must be an upgrade or other cache will take ownership
-                        missQueue->markInService(mshr->pkt);
+                        missQueue->markInService(mshr->pkt, mshr);
                    }
                    return;
                }
@@ -461,10 +462,16 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
    CacheBlk::State new_state;
    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
    if (satisfy) {
+        DPRINTF(Cache, "Cache snooped a %s request and now supplying data,"
+                "new state is %i\n",
+                pkt->cmdString(), new_state);
+
        tags->handleSnoop(blk, new_state, pkt);
        respondToSnoop(pkt, curTick + hitLatency);
        return;
    }
+    if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n",
+                     pkt->cmdString(), new_state);
    tags->handleSnoop(blk, new_state);
 }

@@ -610,7 +617,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
            // update the cache state and statistics
            if (mshr || !writes.empty()){
                // Can't handle it, return pktuest unsatisfied.
-                return 0;
+                panic("Atomic access ran into outstanding MSHR's or WB's!");
            }
            if (!pkt->req->isUncacheable()) {
                // Fetch the cache block to fill
@@ -627,7 +634,9 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
                lat = memSidePort->sendAtomic(busPkt);

                //Be sure to flip the response to a request for coherence
-                busPkt->makeAtomicResponse();
+                if (busPkt->needsResponse()) {
+                    busPkt->makeAtomicResponse();
+                }

 /*		if (!(busPkt->flags & SATISFIED)) {
                    // blocked at a higher level, just return
@@ -662,7 +671,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update, CachePort
            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
        } else if (pkt->isWrite()) {
            // Still need to change data in all locations.
-            return otherSidePort->sendAtomic(pkt);
+            otherSidePort->sendFunctional(pkt);
        }
        return curTick + lat;
    }
@@ -680,9 +689,15 @@ Cache<TagStore,Buffering,Coherence>::snoopProbe(PacketPtr &pkt)
        CacheBlk::State new_state = 0;
        bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
        if (satisfy) {
+            DPRINTF(Cache, "Cache snooped a %s request and now supplying data,"
+                    "new state is %i\n",
+                    pkt->cmdString(), new_state);
+
            tags->handleSnoop(blk, new_state, pkt);
            return hitLatency;
        }
+        if (blk) DPRINTF(Cache, "Cache snooped a %s request, new state is %i\n",
+                     pkt->cmdString(), new_state);
        tags->handleSnoop(blk, new_state);
        return 0;
 }
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -271,7 +271,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
    }

    Packet::Command writeToSharedCmd = doUpgrades ? Packet::UpgradeReq : Packet::ReadExReq;
-    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeResp : Packet::ReadExResp;
+    Packet::Command writeToSharedResp = doUpgrades ? Packet::UpgradeReq : Packet::ReadExResp;

 //@todo add in hardware prefetch to this list
    if (protocol == "msi") {
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -123,12 +123,12 @@ BlockingBuffer::restoreOrigCmd(Packet * &pkt)
 }

 void
-BlockingBuffer::markInService(Packet * &pkt)
+BlockingBuffer::markInService(Packet * &pkt, MSHR* mshr)
 {
    if (!pkt->isCacheFill() && pkt->isWrite()) {
        // Forwarding a write/ writeback, don't need to change
        // the command
-        assert((MSHR*)pkt->senderState == &wb);
+        assert(mshr == &wb);
        cache->clearMasterRequest(Request_WB);
        if (!pkt->needsResponse()) {
            assert(wb.getNumTargets() == 0);
@@ -138,7 +138,7 @@ BlockingBuffer::markInService(Packet * &pkt)
            wb.inService = true;
        }
    } else {
-        assert((MSHR*)pkt->senderState == &miss);
+        assert(mshr == &miss);
        cache->clearMasterRequest(Request_MSHR);
        if (!pkt->needsResponse()) {
            assert(miss.getNumTargets() == 0);
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ b/src/mem/cache/miss/blocking_buffer.hh
@@ -152,7 +152,7 @@ public:
     * are successfully sent.
     * @param pkt The request that was sent on the bus.
     */
-    void markInService(Packet * &pkt);
+    void markInService(Packet * &pkt, MSHR* mshr);

    /**
     * Frees the resources of the pktuest and unblock the cache.
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -372,7 +372,7 @@ MissQueue::allocateMiss(Packet * &pkt, int size, Tick time)
 MSHR*
 MissQueue::allocateWrite(Packet * &pkt, int size, Tick time)
 {
-    MSHR* mshr = wb.allocate(pkt,blkSize);
+    MSHR* mshr = wb.allocate(pkt,size);
    mshr->order = order++;

 //REMOVING COMPRESSION FOR NOW
@@ -446,7 +446,7 @@ MissQueue::handleMiss(Packet * &pkt, int blkSize, Tick time)
        /**
         * @todo Add write merging here.
         */
-        mshr = allocateWrite(pkt, blkSize, time);
+        mshr = allocateWrite(pkt, pkt->getSize(), time);
        return;
    }

@@ -526,9 +526,8 @@ MissQueue::restoreOrigCmd(Packet * &pkt)
 }

 void
-MissQueue::markInService(Packet * &pkt)
+MissQueue::markInService(Packet * &pkt, MSHR* mshr)
 {
-    assert(pkt->senderState != 0);
    bool unblock = false;
    BlockedCause cause = NUM_BLOCKED_CAUSES;

@@ -540,7 +539,7 @@ MissQueue::markInService(Packet * &pkt)
        // Forwarding a write/ writeback, don't need to change
        // the command
        unblock = wb.isFull();
-        wb.markInService((MSHR*)pkt->senderState);
+        wb.markInService(mshr);
        if (!wb.havePending()){
            cache->clearMasterRequest(Request_WB);
        }
@@ -551,11 +550,11 @@ MissQueue::markInService(Packet * &pkt)
        }
    } else {
        unblock = mq.isFull();
-        mq.markInService((MSHR*)pkt->senderState);
+        mq.markInService(mshr);
        if (!mq.havePending()){
            cache->clearMasterRequest(Request_MSHR);
        }
-        if (((MSHR*)(pkt->senderState))->originalCmd == Packet::HardPFReq) {
+        if (mshr->originalCmd == Packet::HardPFReq) {
            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
                    cache->name());
            //Also clear pending if need be
--- a/src/mem/cache/miss/miss_queue.hh
+++ b/src/mem/cache/miss/miss_queue.hh
@@ -256,7 +256,7 @@ class MissQueue
     * are successfully sent.
     * @param pkt The request that was sent on the bus.
     */
-    void markInService(Packet * &pkt);
+    void markInService(Packet * &pkt, MSHR* mshr);

    /**
     * Collect statistics and free resources of a satisfied pktuest.
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -58,10 +58,6 @@ typedef std::list<PacketPtr> PacketList;
 #define NO_ALLOCATE 1 << 5
 #define SNOOP_COMMIT 1 << 6

-//For statistics we need max number of commands, hard code it at
-//20 for now.  @todo fix later
-#define NUM_MEM_CMDS 1 << 9
-
 /**
 * A Packet is used to encapsulate a transfer between two objects in
 * the memory system (e.g., the L1 and L2 cache).  (In contrast, a
@@ -164,6 +160,8 @@ class Packet

  private:
    /** List of command attributes. */
+    // If you add a new CommandAttribute, make sure to increase NUM_MEM_CMDS
+    // as well.
    enum CommandAttribute
    {
        IsRead		= 1 << 0,
@@ -174,30 +172,37 @@ class Packet
        IsResponse 	= 1 << 5,
        NeedsResponse	= 1 << 6,
        IsSWPrefetch    = 1 << 7,
-        IsHWPrefetch    = 1 << 8
+        IsHWPrefetch    = 1 << 8,
+        HasData		= 1 << 9
    };

+//For statistics we need max number of commands, hard code it at
+//20 for now.  @todo fix later
+#define NUM_MEM_CMDS 1 << 10
+
  public:
    /** List of all commands associated with a packet. */
    enum Command
    {
        InvalidCmd      = 0,
        ReadReq		= IsRead  | IsRequest | NeedsResponse,
-        WriteReq	= IsWrite | IsRequest | NeedsResponse,
-        WriteReqNoAck	= IsWrite | IsRequest,
-        ReadResp	= IsRead  | IsResponse | NeedsResponse,
+        WriteReq	= IsWrite | IsRequest | NeedsResponse | HasData,
+        WriteReqNoAck	= IsWrite | IsRequest | HasData,
+        ReadResp	= IsRead  | IsResponse | NeedsResponse | HasData,
        WriteResp	= IsWrite | IsResponse | NeedsResponse,
-        Writeback       = IsWrite | IsRequest,
+        Writeback       = IsWrite | IsRequest | HasData,
        SoftPFReq       = IsRead  | IsRequest | IsSWPrefetch | NeedsResponse,
        HardPFReq       = IsRead  | IsRequest | IsHWPrefetch | NeedsResponse,
-        SoftPFResp      = IsRead  | IsResponse | IsSWPrefetch | NeedsResponse,
-        HardPFResp      = IsRead  | IsResponse | IsHWPrefetch | NeedsResponse,
+        SoftPFResp      = IsRead  | IsResponse | IsSWPrefetch
+                                | NeedsResponse | HasData,
+        HardPFResp      = IsRead  | IsResponse | IsHWPrefetch
+                                | NeedsResponse | HasData,
        InvalidateReq   = IsInvalidate | IsRequest,
-        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest,
-        UpgradeReq      = IsInvalidate | IsRequest | NeedsResponse,
-        UpgradeResp     = IsInvalidate | IsResponse | NeedsResponse,
+        WriteInvalidateReq = IsWrite | IsInvalidate | IsRequest | HasData,
+        UpgradeReq      = IsInvalidate | IsRequest,
        ReadExReq       = IsRead | IsInvalidate | IsRequest | NeedsResponse,
-        ReadExResp      = IsRead | IsInvalidate | IsResponse | NeedsResponse
+        ReadExResp      = IsRead | IsInvalidate | IsResponse
+                                | NeedsResponse | HasData
    };

    /** Return the string name of the cmd field (for debugging and
@@ -219,6 +224,7 @@ class Packet
    bool isResponse()	 { return (cmd & IsResponse) != 0; }
    bool needsResponse() { return (cmd & NeedsResponse) != 0; }
    bool isInvalidate()  { return (cmd & IsInvalidate) != 0; }
+    bool hasData()	 { return (cmd & HasData) != 0; }

    bool isCacheFill() { return (flags & CACHE_LINE_FILL) != 0; }
    bool isNoAllocate() { return (flags & NO_ALLOCATE) != 0; }
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -110,28 +110,112 @@ PhysicalMemory::calculateLatency(Packet *pkt)
    return lat;
 }

+
+
+// Add load-locked to tracking list.  Should only be called if the
+// operation is a load and the LOCKED flag is set.
+void
+PhysicalMemory::trackLoadLocked(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+
+    // first we check if we already have a locked addr for this
+    // xc.  Since each xc only gets one, we just update the
+    // existing record with the new address.
+    list<LockedAddr>::iterator i;
+
+    for (i = lockedAddrList.begin(); i != lockedAddrList.end(); ++i) {
+        if (i->matchesContext(req)) {
+            DPRINTF(LLSC, "Modifying lock record: cpu %d thread %d addr %#x\n",
+                    req->getCpuNum(), req->getThreadNum(), paddr);
+            i->addr = paddr;
+            return;
+        }
+    }
+
+    // no record for this xc: need to allocate a new one
+    DPRINTF(LLSC, "Adding lock record: cpu %d thread %d addr %#x\n",
+            req->getCpuNum(), req->getThreadNum(), paddr);
+    lockedAddrList.push_front(LockedAddr(req));
+}
+
+
+// Called on *writes* only... both regular stores and
+// store-conditional operations.  Check for conventional stores which
+// conflict with locked addresses, and for success/failure of store
+// conditionals.
+bool
+PhysicalMemory::checkLockedAddrList(Request *req)
+{
+    Addr paddr = LockedAddr::mask(req->getPaddr());
+    bool isLocked = req->isLocked();
+
+    // Initialize return value.  Non-conditional stores always
+    // succeed.  Assume conditional stores will fail until proven
+    // otherwise.
+    bool success = !isLocked;
+
+    // Iterate over list.  Note that there could be multiple matching
+    // records, as more than one context could have done a load locked
+    // to this location.
+    list<LockedAddr>::iterator i = lockedAddrList.begin();
+
+    while (i != lockedAddrList.end()) {
+
+        if (i->addr == paddr) {
+            // we have a matching address
+
+            if (isLocked && i->matchesContext(req)) {
+                // it's a store conditional, and as far as the memory
+                // system can tell, the requesting context's lock is
+                // still valid.
+                DPRINTF(LLSC, "StCond success: cpu %d thread %d addr %#x\n",
+                        req->getCpuNum(), req->getThreadNum(), paddr);
+                success = true;
+            }
+
+            // Get rid of our record of this lock and advance to next
+            DPRINTF(LLSC, "Erasing lock record: cpu %d thread %d addr %#x\n",
+                    i->cpuNum, i->threadNum, paddr);
+            i = lockedAddrList.erase(i);
+        }
+        else {
+            // no match: advance to next record
+            ++i;
+        }
+    }
+
+    if (isLocked) {
+        req->setScResult(success ? 1 : 0);
+    }
+
+    return success;
+}
+
 void
 PhysicalMemory::doFunctionalAccess(Packet *pkt)
 {
    assert(pkt->getAddr() + pkt->getSize() <= params()->addrRange.size());

-    switch (pkt->cmd) {
-      case Packet::ReadReq:
+    if (pkt->isRead()) {
+        if (pkt->req->isLocked()) {
+            trackLoadLocked(pkt->req);
+        }
        memcpy(pkt->getPtr<uint8_t>(),
               pmemAddr + pkt->getAddr() - params()->addrRange.start,
               pkt->getSize());
-        break;
-      case Packet::WriteReq:
-        memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
-               pkt->getPtr<uint8_t>(),
-               pkt->getSize());
-        // temporary hack: will need to add real LL/SC implementation
-        // for cacheless systems later.
-        if (pkt->req->getFlags() & LOCKED) {
-            pkt->req->setScResult(1);
+    }
+    else if (pkt->isWrite()) {
+        if (writeOK(pkt->req)) {
+            memcpy(pmemAddr + pkt->getAddr() - params()->addrRange.start,
+                   pkt->getPtr<uint8_t>(), pkt->getSize());
        }
-        break;
-      default:
+    }
+    else if (pkt->isInvalidate()) {
+        //upgrade or invalidate
+        pkt->flags |= SATISFIED;
+    }
+    else {
        panic("unimplemented");
    }

@@ -147,7 +231,7 @@ PhysicalMemory::getPort(const std::string &if_name, int idx)
        port = new MemoryPort(name() + "-port", this);
        return port;
    } else if (if_name == "functional") {
-        /* special port for functional writes at startup. */
+        /* special port for functional writes at startup. And for memtester */
        return new MemoryPort(name() + "-funcport", this);
    } else {
        panic("PhysicalMemory::getPort: unknown port %s requested", if_name);
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -78,6 +78,68 @@ class PhysicalMemory : public MemObject
    const PhysicalMemory &operator=(const PhysicalMemory &specmem);

  protected:
+
+    class LockedAddr {
+      public:
+        // on alpha, minimum LL/SC granularity is 16 bytes, so lower
+        // bits need to masked off.
+        static const Addr Addr_Mask = 0xf;
+
+        static Addr mask(Addr paddr) { return (paddr & ~Addr_Mask); }
+
+        Addr addr; 	// locked address
+        int cpuNum;	// locking CPU
+        int threadNum;	// locking thread ID within CPU
+
+        // check for matching execution context
+        bool matchesContext(Request *req)
+        {
+            return (cpuNum == req->getCpuNum() &&
+                    threadNum == req->getThreadNum());
+        }
+
+        LockedAddr(Request *req)
+            : addr(mask(req->getPaddr())),
+              cpuNum(req->getCpuNum()),
+              threadNum(req->getThreadNum())
+        {
+        }
+    };
+
+    std::list<LockedAddr> lockedAddrList;
+
+    // helper function for checkLockedAddrs(): we really want to
+    // inline a quick check for an empty locked addr list (hopefully
+    // the common case), and do the full list search (if necessary) in
+    // this out-of-line function
+    bool checkLockedAddrList(Request *req);
+
+    // Record the address of a load-locked operation so that we can
+    // clear the execution context's lock flag if a matching store is
+    // performed
+    void trackLoadLocked(Request *req);
+
+    // Compare a store address with any locked addresses so we can
+    // clear the lock flag appropriately.  Return value set to 'false'
+    // if store operation should be suppressed (because it was a
+    // conditional store and the address was no longer locked by the
+    // requesting execution context), 'true' otherwise.  Note that
+    // this method must be called on *all* stores since even
+    // non-conditional stores must clear any matching lock addresses.
+    bool writeOK(Request *req) {
+        if (lockedAddrList.empty()) {
+            // no locked addrs: nothing to check, store_conditional fails
+            bool isLocked = req->isLocked();
+            if (isLocked) {
+                req->setScResult(0);
+            }
+            return !isLocked; // only do write if not an sc
+        } else {
+            // iterate over list...
+            return checkLockedAddrList(req);
+        }
+    }
+
    uint8_t *pmemAddr;
    MemoryPort *port;
    int pagePtr;
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -106,8 +106,7 @@ class Port
    /** Holds the ports status.  Currently just that a range recomputation needs
     * to be done. */
    enum Status {
-        RangeChange,
-        SnoopSquash
+        RangeChange
    };

    void setName(const std::string &name)
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -232,9 +232,11 @@ class Request
    Addr getPC() { assert(validPC); return pc; }

    /** Accessor Function to Check Cacheability. */
-    bool isUncacheable() { return getFlags() & UNCACHEABLE; }
+    bool isUncacheable() { return (getFlags() & UNCACHEABLE) != 0; }

-    bool isInstRead() { return getFlags() & INST_READ; }
+    bool isInstRead() { return (getFlags() & INST_READ) != 0; }
+
+    bool isLocked() { return (getFlags() & LOCKED) != 0; }

    friend class Packet;
 };
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -47,9 +47,11 @@ SimpleTimingPort::recvTiming(Packet *pkt)
    // if we ever added it back.
    assert(pkt->result != Packet::Nacked);
    Tick latency = recvAtomic(pkt);
-    // turn packet around to go back to requester
-    pkt->makeTimingResponse();
-    sendTimingLater(pkt, latency);
+    // turn packet around to go back to requester if response expected
+    if (pkt->needsResponse()) {
+        pkt->makeTimingResponse();
+        sendTimingLater(pkt, latency);
+    }
    return true;
 }