# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder.  You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Definitions for CHI nodes and controller types. These are used by
create_system in configs/ruby/CHI.py or may be used in custom configuration
scripts. When used with create_system, the user may provide an additional
configuration file as the --chi-config parameter to specialize the classes
defined here.

When using the CustomMesh topology, --chi-config must be provided with
specialization of the NoC_Param classes defining the NoC dimensions and
node to router binding. See configs/example/noc_config/2x4.py for an example.
'''

import math
import m5
from m5.objects import *

class Versions:
    '''
    Helper class to obtain unique ids for a given controller class.
    These are passed as the 'version' parameter when creating the controller.
    '''
    _seqs = 0
    @classmethod
    def getSeqId(cls):
        val = cls._seqs
        cls._seqs += 1
        return val

    _version = {}
    @classmethod
    def getVersion(cls, tp):
        if tp not in cls._version:
            cls._version[tp] = 0
        val = cls._version[tp]
        cls._version[tp] = val + 1
        return val


class NoC_Params:
    '''
    Default parameters for the interconnect. The value of data_width is
    also used to set the data_channel_size for all CHI controllers.
    (see configs/ruby/CHI.py)
    '''
    router_link_latency = 1
    node_link_latency = 1
    router_latency = 1
    router_buffer_size = 4
    cntrl_msg_size = 8
    data_width = 32
    cross_links = []
    cross_link_latency = 0

class CHI_Node(SubSystem):
    '''
    Base class with common functions for setting up Cache or Memory
    controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes.
    Notice getNetworkSideControllers and getAllControllers must be implemented
    in the derived classes.
    '''

    class NoC_Params:
        '''
        NoC config. parameters and bindings required for CustomMesh topology.

        Maps 'num_nodes_per_router' CHI nodes to each router provided in
        'router_list'. This assumes len(router_list)*num_nodes_per_router
        equals the number of nodes
        If 'num_nodes_per_router' is left undefined, we circulate around
        'router_list' until all nodes are mapped.
        See 'distributeNodes' in configs/topologies/CustomMesh.py
        '''
        num_nodes_per_router = None
        router_list = None

    def __init__(self, ruby_system):
        super(CHI_Node, self).__init__()
        self._ruby_system = ruby_system
        self._network = ruby_system.network

    def getNetworkSideControllers(self):
        '''
        Returns all ruby controllers that need to be connected to the
        network
        '''
        raise NotImplementedError()

    def getAllControllers(self):
        '''
        Returns all ruby controllers associated with this node
        '''
        raise NotImplementedError()

    def setDownstream(self, cntrls):
        '''
        Sets cntrls as the downstream list of all controllers in this node
        '''
        for c in self.getNetworkSideControllers():
            c.downstream_destinations = cntrls

    def connectController(self, cntrl):
        '''
        Creates and configures the messages buffers for the CHI input/output
        ports that connect to the network
        '''
        cntrl.reqOut = MessageBuffer()
        cntrl.rspOut = MessageBuffer()
        cntrl.snpOut = MessageBuffer()
        cntrl.datOut = MessageBuffer()
        cntrl.reqIn = MessageBuffer()
        cntrl.rspIn = MessageBuffer()
        cntrl.snpIn = MessageBuffer()
        cntrl.datIn = MessageBuffer()

        # All CHI ports are always connected to the network.
        # Controllers that are not part of the getNetworkSideControllers list
        # still communicate using internal routers, thus we need to wire-up the
        # ports
        cntrl.reqOut.out_port = self._network.in_port
        cntrl.rspOut.out_port = self._network.in_port
        cntrl.snpOut.out_port = self._network.in_port
        cntrl.datOut.out_port = self._network.in_port
        cntrl.reqIn.in_port = self._network.out_port
        cntrl.rspIn.in_port = self._network.out_port
        cntrl.snpIn.in_port = self._network.out_port
        cntrl.datIn.in_port = self._network.out_port


class TriggerMessageBuffer(MessageBuffer):
    '''
    MessageBuffer for triggering internal controller events.
    These buffers should not be affected by the Ruby tester randomization
    and allow poping messages enqueued in the same cycle.
    '''
    randomization = 'disabled'
    allow_zero_latency = True

class OrderedTriggerMessageBuffer(TriggerMessageBuffer):
    ordered = True

class CHI_Cache_Controller(Cache_Controller):
    '''
    Default parameters for a Cache controller
    The Cache_Controller can also be used as a DMA requester or as
    a pure directory if all cache allocation policies are disabled.
    '''

    def __init__(self, ruby_system):
        super(CHI_Cache_Controller, self).__init__(
            version = Versions.getVersion(Cache_Controller),
            ruby_system = ruby_system,
            mandatoryQueue = MessageBuffer(),
            prefetchQueue = MessageBuffer(),
            triggerQueue = TriggerMessageBuffer(),
            retryTriggerQueue = OrderedTriggerMessageBuffer(),
            replTriggerQueue = OrderedTriggerMessageBuffer(),
            reqRdy = TriggerMessageBuffer(),
            snpRdy = TriggerMessageBuffer())
        # Set somewhat large number since we really a lot on internal
        # triggers. To limit the controller performance, tweak other
        # params such as: input port buffer size, cache banks, and output
        # port latency
        self.transitions_per_cycle = 128
        # This should be set to true in the data cache controller to enable
        # timeouts on unique lines when a store conditional fails
        self.sc_lock_enabled = False

class CHI_L1Controller(CHI_Cache_Controller):
    '''
    Default parameters for a L1 Cache controller
    '''

    def __init__(self, ruby_system, sequencer, cache, prefetcher):
        super(CHI_L1Controller, self).__init__(ruby_system)
        self.sequencer = sequencer
        self.cache = cache
        self.use_prefetcher = False
        self.send_evictions = True
        self.is_HN = False
        self.enable_DMT = False
        self.enable_DCT = False
        # Strict inclusive MOESI
        self.allow_SD = True
        self.alloc_on_seq_acc = True
        self.alloc_on_seq_line_write = False
        self.alloc_on_readshared = True
        self.alloc_on_readunique = True
        self.alloc_on_readonce = True
        self.alloc_on_writeback = True
        self.dealloc_on_unique = False
        self.dealloc_on_shared = False
        self.dealloc_backinv_unique = True
        self.dealloc_backinv_shared = True
        # Some reasonable default TBE params
        self.number_of_TBEs = 16
        self.number_of_repl_TBEs = 16
        self.number_of_snoop_TBEs = 4
        self.unify_repl_TBEs = False

class CHI_L2Controller(CHI_Cache_Controller):
    '''
    Default parameters for a L2 Cache controller
    '''

    def __init__(self, ruby_system, cache, prefetcher):
        super(CHI_L2Controller, self).__init__(ruby_system)
        self.sequencer = NULL
        self.cache = cache
        self.use_prefetcher = False
        self.allow_SD = True
        self.is_HN = False
        self.enable_DMT = False
        self.enable_DCT = False
        self.send_evictions = False
        # Strict inclusive MOESI
        self.alloc_on_seq_acc = False
        self.alloc_on_seq_line_write = False
        self.alloc_on_readshared = True
        self.alloc_on_readunique = True
        self.alloc_on_readonce = True
        self.alloc_on_writeback = True
        self.dealloc_on_unique = False
        self.dealloc_on_shared = False
        self.dealloc_backinv_unique = True
        self.dealloc_backinv_shared = True
        # Some reasonable default TBE params
        self.number_of_TBEs = 32
        self.number_of_repl_TBEs = 32
        self.number_of_snoop_TBEs = 16
        self.unify_repl_TBEs = False

class CHI_HNFController(CHI_Cache_Controller):
    '''
    Default parameters for a coherent home node (HNF) cache controller
    '''

    def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
        super(CHI_HNFController, self).__init__(ruby_system)
        self.sequencer = NULL
        self.cache = cache
        self.use_prefetcher = False
        self.addr_ranges = addr_ranges
        self.allow_SD = True
        self.is_HN = True
        self.enable_DMT = True
        self.enable_DCT = True
        self.send_evictions = False
        # MOESI / Mostly inclusive for shared / Exclusive for unique
        self.alloc_on_seq_acc = False
        self.alloc_on_seq_line_write = False
        self.alloc_on_readshared = True
        self.alloc_on_readunique = False
        self.alloc_on_readonce = True
        self.alloc_on_writeback = True
        self.dealloc_on_unique = True
        self.dealloc_on_shared = False
        self.dealloc_backinv_unique = False
        self.dealloc_backinv_shared = False
        # Some reasonable default TBE params
        self.number_of_TBEs = 32
        self.number_of_repl_TBEs = 32
        self.number_of_snoop_TBEs = 1 # should not receive any snoop
        self.unify_repl_TBEs = False

class CHI_DMAController(CHI_Cache_Controller):
    '''
    Default parameters for a DMA controller
    '''

    def __init__(self, ruby_system, sequencer):
        super(CHI_DMAController, self).__init__(ruby_system)
        self.sequencer = sequencer
        class DummyCache(RubyCache):
            dataAccessLatency = 0
            tagAccessLatency = 1
            size = "128"
            assoc = 1
        self.use_prefetcher = False
        self.cache = DummyCache()
        self.sequencer.dcache = NULL
        # All allocations are false
        # Deallocations are true (don't really matter)
        self.allow_SD = False
        self.is_HN = False
        self.enable_DMT = False
        self.enable_DCT = False
        self.alloc_on_seq_acc = False
        self.alloc_on_seq_line_write = False
        self.alloc_on_readshared = False
        self.alloc_on_readunique = False
        self.alloc_on_readonce = False
        self.alloc_on_writeback = False
        self.dealloc_on_unique = False
        self.dealloc_on_shared = False
        self.dealloc_backinv_unique = False
        self.dealloc_backinv_shared = False
        self.send_evictions = False
        self.number_of_TBEs = 16
        self.number_of_repl_TBEs = 1
        self.number_of_snoop_TBEs = 1 # should not receive any snoop
        self.unify_repl_TBEs = False

class CPUSequencerWrapper:
    '''
    Other generic configuration scripts assume a matching number of sequencers
    and cpus. This wraps the instruction and data sequencer so they are
    compatible with the other scripts. This assumes all scripts are using
    connectCpuPorts/connectIOPorts to bind ports
    '''

    def __init__(self, iseq, dseq):
        # use this style due to __setattr__ override below
        self.__dict__['inst_seq'] = iseq
        self.__dict__['data_seq'] = dseq
        self.__dict__['support_data_reqs'] = True
        self.__dict__['support_inst_reqs'] = True
        # Compatibility with certain scripts that wire up ports
        # without connectCpuPorts
        self.__dict__['slave'] = dseq.in_ports
        self.__dict__['in_ports'] = dseq.in_ports

    def connectCpuPorts(self, cpu):
        assert(isinstance(cpu, BaseCPU))
        cpu.icache_port = self.inst_seq.in_ports
        for p in cpu._cached_ports:
            if str(p) != 'icache_port':
                exec('cpu.%s = self.data_seq.in_ports' % p)
        cpu.connectUncachedPorts(self.data_seq)

    def connectIOPorts(self, piobus):
        self.data_seq.connectIOPorts(piobus)

    def __setattr__(self, name, value):
        setattr(self.inst_seq, name, value)
        setattr(self.data_seq, name, value)

class CHI_RNF(CHI_Node):
    '''
    Defines a CHI request node.
    Notice all contollers and sequencers are set as children of the cpus, so
    this object acts more like a proxy for seting things up and has no topology
    significance unless the cpus are set as its children at the top level
    '''

    def __init__(self, cpus, ruby_system,
                 l1Icache_type, l1Dcache_type,
                 cache_line_size,
                 l1Iprefetcher_type=None, l1Dprefetcher_type=None):
        super(CHI_RNF, self).__init__(ruby_system)

        self._block_size_bits = int(math.log(cache_line_size, 2))

        # All sequencers and controllers
        self._seqs = []
        self._cntrls = []

        # Last level controllers in this node, i.e., the ones that will send
        # requests to the home nodes
        self._ll_cntrls = []

        self._cpus = cpus

        # First creates L1 caches and sequencers
        for cpu in self._cpus:
            cpu.inst_sequencer = RubySequencer(version = Versions.getSeqId(),
                                         ruby_system = ruby_system)
            cpu.data_sequencer = RubySequencer(version = Versions.getSeqId(),
                                         ruby_system = ruby_system)

            self._seqs.append(CPUSequencerWrapper(cpu.inst_sequencer,
                                                  cpu.data_sequencer))

            # caches
            l1i_cache = l1Icache_type(start_index_bit = self._block_size_bits,
                                      is_icache = True)

            l1d_cache = l1Dcache_type(start_index_bit = self._block_size_bits,
                                      is_icache = False)

            # Placeholders for future prefetcher support
            if l1Iprefetcher_type != None or l1Dprefetcher_type != None:
                m5.fatal('Prefetching not supported yet')
            l1i_pf = NULL
            l1d_pf = NULL

            # cache controllers
            cpu.l1i = CHI_L1Controller(ruby_system, cpu.inst_sequencer,
                                       l1i_cache, l1i_pf)

            cpu.l1d = CHI_L1Controller(ruby_system, cpu.data_sequencer,
                                       l1d_cache, l1d_pf)

            cpu.inst_sequencer.dcache = NULL
            cpu.data_sequencer.dcache = cpu.l1d.cache

            cpu.l1d.sc_lock_enabled = True

            cpu._ll_cntrls = [cpu.l1i, cpu.l1d]
            for c in cpu._ll_cntrls:
                self._cntrls.append(c)
                self.connectController(c)
                self._ll_cntrls.append(c)

    def getSequencers(self):
        return self._seqs

    def getAllControllers(self):
        return self._cntrls

    def getNetworkSideControllers(self):
        return self._cntrls

    def setDownstream(self, cntrls):
        for c in self._ll_cntrls:
            c.downstream_destinations = cntrls

    def getCpus(self):
        return self._cpus

    # Adds a private L2 for each cpu
    def addPrivL2Cache(self, cache_type, pf_type=None):
        self._ll_cntrls = []
        for cpu in self._cpus:
            l2_cache = cache_type(start_index_bit = self._block_size_bits,
                                  is_icache = False)
            if pf_type != None:
                m5.fatal('Prefetching not supported yet')
            l2_pf = NULL

            cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf)

            self._cntrls.append(cpu.l2)
            self.connectController(cpu.l2)

            self._ll_cntrls.append(cpu.l2)

            for c in cpu._ll_cntrls:
                c.downstream_destinations = [cpu.l2]
            cpu._ll_cntrls = [cpu.l2]


class CHI_HNF(CHI_Node):
    '''
    Encapsulates an HNF cache/directory controller.
    Before the first controller is created, the class method
    CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object
    to set-up the interleaved address ranges used by the HNFs
    '''

    class NoC_Params(CHI_Node.NoC_Params):
        '''HNFs may also define the 'pairing' parameter to allow pairing'''
        pairing = None

    _addr_ranges = []
    @classmethod
    def createAddrRanges(cls, sys_mem_ranges, cache_line_size, num_hnfs):
        # Create the HNFs interleaved addr ranges
        block_size_bits = int(math.log(cache_line_size, 2))
        cls._addr_ranges = []
        llc_bits = int(math.log(num_hnfs, 2))
        numa_bit = block_size_bits + llc_bits - 1
        for i in range(num_hnfs):
            ranges = []
            for r in sys_mem_ranges:
                addr_range = AddrRange(r.start, size = r.size(),
                                        intlvHighBit = numa_bit,
                                        intlvBits = llc_bits,
                                        intlvMatch = i)
                ranges.append(addr_range)
            cls._addr_ranges.append((ranges, numa_bit, i))

    @classmethod
    def getAddrRanges(cls, hnf_idx):
        assert(len(cls._addr_ranges) != 0)
        return cls._addr_ranges[hnf_idx]

    # The CHI controller can be a child of this object or another if
    # 'parent' if specified
    def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
        super(CHI_HNF, self).__init__(ruby_system)

        addr_ranges,intlvHighBit,intlvMatch = self.getAddrRanges(hnf_idx)
        # All ranges should have the same interleaving
        assert(len(addr_ranges) >= 1)
        assert(intlvMatch == hnf_idx)

        ll_cache = llcache_type(start_index_bit = intlvHighBit + 1)
        self._cntrl = CHI_HNFController(ruby_system, ll_cache, NULL,
                                        addr_ranges)

        if parent == None:
            self.cntrl = self._cntrl
        else:
            parent.cntrl = self._cntrl

        self.connectController(self._cntrl)

    def getAllControllers(self):
        return [self._cntrl]

    def getNetworkSideControllers(self):
        return [self._cntrl]


class CHI_SNF_Base(CHI_Node):
    '''
    Creates CHI node controllers for the memory controllers
    '''

    # The CHI controller can be a child of this object or another if
    # 'parent' if specified
    def __init__(self, ruby_system, parent):
        super(CHI_SNF_Base, self).__init__(ruby_system)

        self._cntrl = Memory_Controller(
                          version = Versions.getVersion(Memory_Controller),
                          ruby_system = ruby_system,
                          triggerQueue = TriggerMessageBuffer(),
                          responseFromMemory = MessageBuffer(),
                          requestToMemory = MessageBuffer(ordered = True),
                          reqRdy = TriggerMessageBuffer())

        self.connectController(self._cntrl)

        if parent:
            parent.cntrl = self._cntrl
        else:
            self.cntrl = self._cntrl

    def getAllControllers(self):
        return [self._cntrl]

    def getNetworkSideControllers(self):
        return [self._cntrl]

    def getMemRange(self, mem_ctrl):
        # TODO need some kind of transparent API for
        # MemCtrl+DRAM vs SimpleMemory
        if hasattr(mem_ctrl, 'range'):
            return mem_ctrl.range
        else:
            return mem_ctrl.dram.range

class CHI_SNF_BootMem(CHI_SNF_Base):
    '''
    Create the SNF for the boot memory
    '''

    def __init__(self, ruby_system, parent, bootmem):
        super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
        self._cntrl.memory_out_port = bootmem.port
        self._cntrl.addr_ranges = self.getMemRange(bootmem)

class CHI_SNF_MainMem(CHI_SNF_Base):
    '''
    Create the SNF for a list main memory controllers
    '''

    def __init__(self, ruby_system, parent, mem_ctrl = None):
        super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
        if mem_ctrl:
            self._cntrl.memory_out_port = mem_ctrl.port
            self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
        # else bind ports and range later

class CHI_RNI_Base(CHI_Node):
    '''
    Request node without cache / DMA
    '''

    # The CHI controller can be a child of this object or another if
    # 'parent' if specified
    def __init__(self, ruby_system, parent):
        super(CHI_RNI_Base, self).__init__(ruby_system)

        self._sequencer = RubySequencer(version = Versions.getSeqId(),
                                         ruby_system = ruby_system,
                                         clk_domain = ruby_system.clk_domain)
        self._cntrl = CHI_DMAController(ruby_system, self._sequencer)

        if parent:
            parent.cntrl = self._cntrl
        else:
            self.cntrl = self._cntrl

        self.connectController(self._cntrl)

    def getAllControllers(self):
        return [self._cntrl]

    def getNetworkSideControllers(self):
        return [self._cntrl]

class CHI_RNI_DMA(CHI_RNI_Base):
    '''
    DMA controller wiredup to a given dma port
    '''

    def __init__(self, ruby_system, dma_port, parent):
        super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
        assert(dma_port != None)
        self._sequencer.in_ports = dma_port

class CHI_RNI_IO(CHI_RNI_Base):
    '''
    DMA controller wiredup to ruby_system IO port
    '''

    def __init__(self, ruby_system, parent):
        super(CHI_RNI_IO, self).__init__(ruby_system, parent)
        ruby_system._io_port = self._sequencer