Files
gem5/configs/common/cores/arm/ex5_big.py
Pranith 50f652a2ee Implement BTB using the cache library (#1537)
This enables the BTB to be associative and use various replacement
policies.
2024-10-10 17:05:22 +01:00

217 lines
6.4 KiB
Python

# Copyright (c) 2012 The Regents of The University of Michigan
# Copyright (c) 2016 Centre National de la Recherche Scientifique
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.objects import *
# -----------------------------------------------------------------------
# ex5 big core (based on the ARM Cortex-A15)
# -----------------------------------------------------------------------
# Simple ALU Instructions have a latency of 1
class ex5_big_Simple_Int(FUDesc):
opList = [OpDesc(opClass="IntAlu", opLat=1)]
count = 2
# Complex ALU instructions have a variable latencies
class ex5_big_Complex_Int(FUDesc):
opList = [
OpDesc(opClass="IntMult", opLat=4, pipelined=True),
OpDesc(opClass="IntDiv", opLat=11, pipelined=False),
OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
]
count = 1
# Floating point and SIMD instructions
class ex5_big_FP(FUDesc):
opList = [
OpDesc(opClass="SimdAdd", opLat=3),
OpDesc(opClass="SimdAddAcc", opLat=4),
OpDesc(opClass="SimdAlu", opLat=4),
OpDesc(opClass="SimdCmp", opLat=4),
OpDesc(opClass="SimdCvt", opLat=3),
OpDesc(opClass="SimdMisc", opLat=3),
OpDesc(opClass="SimdMult", opLat=6),
OpDesc(opClass="SimdMultAcc", opLat=5),
OpDesc(opClass="SimdMatMultAcc", opLat=5),
OpDesc(opClass="SimdShift", opLat=3),
OpDesc(opClass="SimdShiftAcc", opLat=3),
OpDesc(opClass="SimdSqrt", opLat=9),
OpDesc(opClass="SimdFloatAdd", opLat=6),
OpDesc(opClass="SimdFloatAlu", opLat=5),
OpDesc(opClass="SimdFloatCmp", opLat=3),
OpDesc(opClass="SimdFloatCvt", opLat=3),
OpDesc(opClass="SimdFloatDiv", opLat=21),
OpDesc(opClass="SimdFloatMisc", opLat=3),
OpDesc(opClass="SimdFloatMult", opLat=6),
OpDesc(opClass="SimdFloatMultAcc", opLat=1),
OpDesc(opClass="SimdFloatMatMultAcc", opLat=1),
OpDesc(opClass="SimdFloatSqrt", opLat=9),
OpDesc(opClass="FloatAdd", opLat=6),
OpDesc(opClass="FloatCmp", opLat=5),
OpDesc(opClass="FloatCvt", opLat=5),
OpDesc(opClass="FloatDiv", opLat=12, pipelined=False),
OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
OpDesc(opClass="FloatMult", opLat=8),
]
count = 2
# Load/Store Units
class ex5_big_Load(FUDesc):
opList = [OpDesc(opClass="MemRead", opLat=2)]
count = 1
class ex5_big_Store(FUDesc):
opList = [OpDesc(opClass="MemWrite", opLat=2)]
count = 1
# Functional Units for this CPU
class ex5_big_FUP(FUPool):
FUList = [
ex5_big_Simple_Int(),
ex5_big_Complex_Int(),
ex5_big_Load(),
ex5_big_Store(),
ex5_big_FP(),
]
class ex5_big_BTB(SimpleBTB):
numEntries = 4096
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
# Bi-Mode Branch Predictor
class ex5_big_BP(BiModeBP):
btb = ex5_big_BTB()
ras = ReturnAddrStack(numEntries=48)
globalPredictorSize = 4096
globalCtrBits = 2
choicePredictorSize = 1024
choiceCtrBits = 3
instShiftAmt = 2
class ex5_big(ArmO3CPU):
LQEntries = 16
SQEntries = 16
LSQDepCheckShift = 0
LFSTSize = 1024
SSITSize = 1024
decodeToFetchDelay = 1
renameToFetchDelay = 1
iewToFetchDelay = 1
commitToFetchDelay = 1
renameToDecodeDelay = 1
iewToDecodeDelay = 1
commitToDecodeDelay = 1
iewToRenameDelay = 1
commitToRenameDelay = 1
commitToIEWDelay = 1
fetchWidth = 3
fetchBufferSize = 16
fetchToDecodeDelay = 3
decodeWidth = 3
decodeToRenameDelay = 2
renameWidth = 3
renameToIEWDelay = 1
issueToExecuteDelay = 1
dispatchWidth = 6
issueWidth = 8
wbWidth = 8
fuPool = ex5_big_FUP()
iewToCommitDelay = 1
renameToROBDelay = 1
commitWidth = 8
squashWidth = 8
trapLatency = 13
backComSize = 5
forwardComSize = 5
numPhysIntRegs = 90
numPhysFloatRegs = 256
numIQEntries = 48
numROBEntries = 60
switched_out = False
branchPred = ex5_big_BP()
class L1Cache(Cache):
tag_latency = 2
data_latency = 2
response_latency = 2
tgts_per_mshr = 8
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
# Instruction Cache
class L1I(L1Cache):
mshrs = 2
size = "32KiB"
assoc = 2
is_read_only = True
# Data Cache
class L1D(L1Cache):
mshrs = 6
size = "32KiB"
assoc = 2
write_buffers = 16
# L2 Cache
class L2(Cache):
tag_latency = 15
data_latency = 15
response_latency = 15
mshrs = 16
tgts_per_mshr = 8
size = "2MiB"
assoc = 16
write_buffers = 8
clusivity = "mostly_excl"
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=8, latency=1, prefetch_on_access=True)
tags = BaseSetAssoc()
replacement_policy = RandomRP()