mem: Snoop filter support for large systems

Changed SnoopMask to use std::bitset instead of uint64 so we can simulate
larger systems without having to workaround limitations on the number of
ports. No noticeable performance drop was observed after this change.
The size of the bitset is currently set to 256 which should fit most
needs.

Change-Id: I216882300500e2dcb789889756e73a1033271621
Signed-off-by: Tiago Muck <tiago.muck@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18791
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Tiago Muck
2019-02-07 13:37:40 -06:00
committed by Tiago Mück
parent cdc03f96a7
commit c73b7e50d0
2 changed files with 34 additions and 28 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017 ARM Limited
* Copyright (c) 2013-2017,2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -49,11 +49,13 @@
#include "debug/SnoopFilter.hh"
#include "sim/system.hh"
const int SnoopFilter::SNOOP_MASK_SIZE;
void
SnoopFilter::eraseIfNullEntry(SnoopFilterCache::iterator& sf_it)
{
SnoopItem& sf_item = sf_it->second;
if (!(sf_item.requested | sf_item.holder)) {
if ((sf_item.requested | sf_item.holder).none()) {
cachedLocations.erase(sf_it);
DPRINTF(SnoopFilter, "%s: Removed SF entry.\n",
__func__);
@@ -96,8 +98,7 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port)
totRequests++;
if (is_hit) {
// Single bit set -> value is a power of two
if (isPow2(interested))
if (interested.count() == 1)
hitSingleRequests++;
else
hitMultiRequests++;
@@ -114,8 +115,9 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port)
if (cpkt->needsResponse()) {
if (!cpkt->cacheResponding()) {
// Max one request per address per port
panic_if(sf_item.requested & req_port, "double request :( " \
"SF value %x.%x\n", sf_item.requested, sf_item.holder);
panic_if((sf_item.requested & req_port).any(),
"double request :( SF value %x.%x\n",
sf_item.requested, sf_item.holder);
// Mark in-flight requests to distinguish later on
sf_item.requested |= req_port;
@@ -126,7 +128,8 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port)
// to the CPU, already -> the response will not be seen by this
// filter -> we do not need to keep the in-flight request, but make
// sure that we know that that cluster has a copy
panic_if(!(sf_item.holder & req_port), "Need to hold the value!");
panic_if((sf_item.holder & req_port).none(),
"Need to hold the value!");
DPRINTF(SnoopFilter,
"%s: not marking request. SF value %x.%x\n",
__func__, sf_item.requested, sf_item.holder);
@@ -134,7 +137,7 @@ SnoopFilter::lookupRequest(const Packet* cpkt, const SlavePort& slave_port)
} else { // if (!cpkt->needsResponse())
assert(cpkt->isEviction());
// make sure that the sender actually had the line
panic_if(!(sf_item.holder & req_port), "requester %x is not a " \
panic_if((sf_item.holder & req_port).none(), "requester %x is not a " \
"holder :( SF value %x.%x\n", req_port,
sf_item.requested, sf_item.holder);
// CleanEvicts and Writebacks -> the sender and all caches above
@@ -206,8 +209,8 @@ SnoopFilter::lookupSnoop(const Packet* cpkt)
SnoopMask interested = (sf_item.holder | sf_item.requested);
totSnoops++;
// Single bit set -> value is a power of two
if (isPow2(interested))
if (interested.count() == 1)
hitSingleSnoops++;
else
hitMultiSnoops++;
@@ -222,7 +225,7 @@ SnoopFilter::lookupSnoop(const Packet* cpkt)
assert(cpkt->isWriteback() || cpkt->req->isUncacheable() ||
(cpkt->isInvalidate() == cpkt->needsWritable()) ||
cpkt->req->isCacheMaintenance());
if (cpkt->isInvalidate() && !sf_item.requested) {
if (cpkt->isInvalidate() && sf_item.requested.none()) {
// Early clear of the holder, if no other request is currently going on
// @todo: This should possibly be updated even though we do not filter
// upward snoops
@@ -266,11 +269,12 @@ SnoopFilter::updateSnoopResponse(const Packet* cpkt,
__func__, sf_item.requested, sf_item.holder);
// The source should have the line
panic_if(!(sf_item.holder & rsp_mask), "SF value %x.%x does not have "\
"the line\n", sf_item.requested, sf_item.holder);
panic_if((sf_item.holder & rsp_mask).none(),
"SF value %x.%x does not have the line\n",
sf_item.requested, sf_item.holder);
// The destination should have had a request in
panic_if(!(sf_item.requested & req_mask), "SF value %x.%x missing "\
panic_if((sf_item.requested & req_mask).none(), "SF value %x.%x missing "\
"the original request\n", sf_item.requested, sf_item.holder);
// If the snoop response has no sharers the line is passed in
@@ -287,7 +291,7 @@ SnoopFilter::updateSnoopResponse(const Packet* cpkt,
// @todo Deal with invalidating responses
sf_item.holder |= req_mask;
sf_item.requested &= ~req_mask;
assert(sf_item.requested | sf_item.holder);
assert((sf_item.requested | sf_item.holder).any());
DPRINTF(SnoopFilter, "%s: new SF value %x.%x\n",
__func__, sf_item.requested, sf_item.holder);
}
@@ -359,8 +363,9 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port)
__func__, sf_item.requested, sf_item.holder);
// Make sure we have seen the actual request, too
panic_if(!(sf_item.requested & slave_mask), "SF value %x.%x missing "\
"request bit\n", sf_item.requested, sf_item.holder);
panic_if((sf_item.requested & slave_mask).none(),
"SF value %x.%x missing request bit\n",
sf_item.requested, sf_item.holder);
sf_item.requested &= ~slave_mask;
// Update the residency of the cache line.
@@ -376,7 +381,7 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port)
// Any other response implies that a cache above will have the
// block.
sf_item.holder |= slave_mask;
assert(sf_item.holder | sf_item.requested);
assert((sf_item.holder | sf_item.requested).any());
}
DPRINTF(SnoopFilter, "%s: new SF value %x.%x\n",
__func__, sf_item.requested, sf_item.holder);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2016 ARM Limited
* Copyright (c) 2013-2016,2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
#ifndef __MEM_SNOOP_FILTER_HH__
#define __MEM_SNOOP_FILTER_HH__
#include <bitset>
#include <unordered_map>
#include <utility>
@@ -86,6 +87,10 @@
*/
class SnoopFilter : public SimObject {
public:
// Change for systems with more than 256 ports tracked by this object
static const int SNOOP_MASK_SIZE = 256;
typedef std::vector<QueuedSlavePort*> SnoopList;
SnoopFilter (const SnoopFilterParams *p) :
@@ -114,9 +119,9 @@ class SnoopFilter : public SimObject {
}
// make sure we can deal with this many ports
fatal_if(id > 8 * sizeof(SnoopMask),
fatal_if(id > SNOOP_MASK_SIZE,
"Snoop filter only supports %d snooping ports, got %d\n",
8 * sizeof(SnoopMask), id);
SNOOP_MASK_SIZE, id);
}
/**
@@ -198,13 +203,9 @@ class SnoopFilter : public SimObject {
/**
* The underlying type for the bitmask we use for tracking. This
* limits the number of snooping ports supported per crossbar. For
* the moment it is an uint64_t to offer maximum
* scalability. However, it is possible to use e.g. a uint16_t or
* uint32_to slim down the footprint of the hash map (and
* ultimately improve the simulation performance).
* limits the number of snooping ports supported per crossbar.
*/
typedef uint64_t SnoopMask;
typedef std::bitset<SNOOP_MASK_SIZE> SnoopMask;
/**
* Per cache line item tracking a bitmask of SlavePorts who have an
@@ -314,7 +315,7 @@ SnoopFilter::maskToPortList(SnoopMask port_mask) const
{
SnoopList res;
for (const auto& p : slavePorts)
if (port_mask & portToMask(*p))
if ((port_mask & portToMask(*p)).any())
res.push_back(p);
return res;
}