DRAMSys/src/libdramsys/DRAMSys/simulation/AddressDecoder.cpp

/*
 * Copyright (c) 2025, RPTU Kaiserslautern-Landau
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Authors:
 *    Johannes Feldmann
 *    Lukas Steiner
 *    Luiza Correa
 *    Derek Christ
 *    Thomas Zimmermann
 */

#include "AddressDecoder.h"
#include "DRAMSys/config/AddressMapping.h"

#include <bit>
#include <bitset>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <cstdint>
#include <set>

namespace DRAMSys
{
/********************/
/* Helper Functions */
/********************/
/**
 * @brief Creates a bitmask and stores it in a uint64_t.
 *
 * @param numBits The number of bits to set to 1.
 * @param startIndex The index of the first bit to set to 1.
 * @return result The uint64_t where the bitmask will be stored.
 */
uint64_t createBitmask(unsigned numBits, unsigned startIndex) {
    // Create the mask by shifting 1's to the correct position
    return ((UINT64_C(1) << numBits) - 1) << startIndex;
}

std::vector<std::bitset<AddressDecoder::ADDRESS_WIDTH>> AddressDecoder::transposeMatrix(const std::vector<std::bitset<ADDRESS_WIDTH>>& matrix) {
    size_t size = matrix.size();
    std::vector<std::bitset<ADDRESS_WIDTH>> transposedMatrix(size);

    for (size_t i = 0; i < size; ++i) {
        for (size_t j = 0; j < ADDRESS_WIDTH; ++j) {
            if (matrix[i].test(j))
            transposedMatrix[j].set(i);
        }
    }
    return transposedMatrix;
}

uint64_t AddressDecoder::gf2Multiplication(const uint64_t& inputVec, const std::vector<std::bitset<ADDRESS_WIDTH>>& matrix) const
{
    #if defined(__clang__) || defined(__GNUC__)
        uint64_t result = 0;
        for (size_t i = 0; i < matrix.size(); ++i) {
            uint64_t row = matrix[i].to_ullong();
            uint64_t val = inputVec & row;
            bool parity = __builtin_parityll(val);
            result |= (uint64_t(parity) << i);
        }
        return result;
    #else
        std::bitset<ADDRESS_WIDTH> resultBits;
        std::bitset<ADDRESS_WIDTH> inputBits(inputVec);

        for (size_t i = 0; i < matrix.size(); ++i) {
            resultBits[i] = (inputBits & matrix[i]).count() % 2;
        }
        return resultBits.to_ullong();
    #endif

    // Print input, mapping matrix and output in a readable way (useful for debugging)
    // std::cout << "Vec " << ":\t" << std::bitset<ADDRESS_WIDTH>(vector[0]) << std::endl << std::endl;
    // for (size_t i = 0; i < mappingMatrix.size(); ++i) {
    //     std::cout << "Row " << i << ":\t" << mappingMatrix[i] << " | " << resultBits[i] << std::endl;
    // }
}


/****************************/
/* AddressDecoder Functions */
/****************************/

AddressDecoder::AddressDecoder(const DRAMSys::Config::AddressMapping& addressMapping) :
    highestBitValue(addressMapping.getHighestBit())
{
    mappingMatrix = std::vector<std::bitset<ADDRESS_WIDTH>>(highestBitValue + 1);
    upperBoundAddress = std::pow(2, highestBitValue + 1) - 1;

    auto addBitsToMatrix = [&](const std::optional<std::vector<Config::AddressMapping::BitEntry>> bits, int *rowIndex, std::string_view name)
    {
        if (!bits.has_value())
            return AddressComponent(-1, 0, name);
        for (auto row : bits.value()) {
            for (unsigned int bit : row) {
                mappingMatrix[*rowIndex][bit] = true;
            }
            (*rowIndex)++;
        }
        // Care: The rowIndex has been changed. We want the lowest bit, so we must subtract the length!
        return AddressComponent(*rowIndex - bits.value().size(), bits.value().size(), name);
    };


    int rowIndex = 0;
    byteBits          = addBitsToMatrix(addressMapping.BYTE_BIT, &rowIndex, "By");
    columnBits        = addBitsToMatrix(addressMapping.COLUMN_BIT, &rowIndex, "Co");
    bankGroupBits     = addBitsToMatrix(addressMapping.BANKGROUP_BIT, &rowIndex, "BG");
    bankBits          = addBitsToMatrix(addressMapping.BANK_BIT, &rowIndex, "Ba");
    rowBits           = addBitsToMatrix(addressMapping.ROW_BIT, &rowIndex, "Ro");
    pseudochannelBits = addBitsToMatrix(addressMapping.PSEUDOCHANNEL_BIT, &rowIndex, "PC");
    channelBits       = addBitsToMatrix(addressMapping.CHANNEL_BIT, &rowIndex, "Ch");
    rankBits          = addBitsToMatrix(addressMapping.RANK_BIT, &rowIndex, "Ra");
    stackBits         = addBitsToMatrix(addressMapping.STACK_BIT, &rowIndex, "St");
    transposedMappingMatrix = transposeMatrix(mappingMatrix);

    bankgroupsPerRank = std::lround(std::pow(2, bankGroupBits.length));
    banksPerGroup = std::lround(std::pow(2, bankBits.length));
}


void AddressDecoder::plausibilityCheck(const MemSpec& memSpec)
{
    (*this).memSpec = &memSpec;
    np2Flag = not allComponentsArePowerOfTwo(memSpec);

    // Check if all address bits are used
    // TODO: Check if every bit occurs ~exactly~ once or just at least once?
    std::bitset<ADDRESS_WIDTH> orBitset(0);
    for (auto bitset: mappingMatrix) {
        orBitset |= bitset;
    }


    std::bitset<ADDRESS_WIDTH> mask((1ULL << (highestBitValue + 1)) - 1);
    if (orBitset != mask) {
        SC_REPORT_FATAL("AddressDecoder", "Not all address bits are used");
    }

    // Check if the byte bits are continous and starting from 0
    uint64_t row = 0;
    for (size_t i = 0; i < byteBits.length; i++) {
        row |= mappingMatrix[byteBits.idx + i].to_ullong();
    }
    if (row != ((1ULL << byteBits.length) - 1)) {
        SC_REPORT_FATAL("AddressDecoder", "Not all address bits occur exactly once");
    }

    // Check if the addresss mapping is capable of matching the requirements of the memSpec
    checkMemSpecCompatibility(memSpec);
    checkMemorySize(memSpec);
    checkByteBits(memSpec);
    checkBurstLengthBits(memSpec);
}

bool AddressDecoder::allComponentsArePowerOfTwo(const MemSpec& memSpec) const {
    // TODO: What parts do we need to check?
    return isPowerOfTwo(memSpec.numberOfChannels) &&
           isPowerOfTwo(memSpec.ranksPerChannel) &&
           isPowerOfTwo(memSpec.bankGroupsPerChannel) &&
           isPowerOfTwo(memSpec.banksPerChannel) &&
           isPowerOfTwo(memSpec.devicesPerRank) &&
           isPowerOfTwo(memSpec.columnsPerRow);
}

void AddressDecoder::checkMemorySize(const MemSpec& memSpec) {
    bool isMemorySizeMismatch = memSpec.getSimMemSizeInBytes() > upperBoundAddress + 1 ||
        (memSpec.getSimMemSizeInBytes() < upperBoundAddress + 1 && !np2Flag);

    if (isMemorySizeMismatch) {
        SC_REPORT_FATAL("AddressDecoder", "The mapped bits do not match the memory size");
    }
}

void AddressDecoder::checkMemSpecCompatibility(const MemSpec& memSpec) {
    unsigned channels = std::lround(std::pow(2, channelBits.length));
    unsigned ranks = std::lround(std::pow(2, rankBits.length));
    unsigned rows = std::lround(std::pow(2, rowBits.length));
    unsigned columns = std::lround(std::pow(2, columnBits.length));
    unsigned pseudochannels = std::lround(std::pow(2, pseudochannelBits.length));

    unsigned absoluteBankGroups = bankgroupsPerRank * (ranks * pseudochannels);
    unsigned absoluteBanks = banksPerGroup * absoluteBankGroups;

    // Depending on the NP2 flag we must adapt the strictness of this check
    if (np2Flag) {
        if (memSpec.numberOfChannels > channels || memSpec.ranksPerChannel > (ranks * pseudochannels) ||
            memSpec.bankGroupsPerChannel > absoluteBankGroups ||
            memSpec.banksPerChannel > absoluteBanks || memSpec.rowsPerBank > rows ||
            memSpec.columnsPerRow > columns)
            SC_REPORT_FATAL("AddressDecoder", "Memspec and address mapping do not match");
    }
    else {
        if (memSpec.numberOfChannels != channels || memSpec.ranksPerChannel != (ranks * pseudochannels) ||
            memSpec.bankGroupsPerChannel != absoluteBankGroups ||
            memSpec.banksPerChannel != absoluteBanks || memSpec.rowsPerBank != rows ||
            memSpec.columnsPerRow != columns)
            SC_REPORT_FATAL("AddressDecoder", "Memspec and address mapping do not match");
    }
}

void AddressDecoder::checkAddressableLimits(const MemSpec& memSpec) {
    validateAddressableLimit(memSpec.numberOfChannels, calculateAddressableElements(channelBits.length), "Channel");
    validateAddressableLimit(memSpec.ranksPerChannel, calculateAddressableElements(bankBits.length), "Rank");
    unsigned addressableBankGroups = calculateAddressableElements(bankGroupBits.length) * calculateAddressableElements(rankBits.length);
    unsigned absoluteBanks = calculateAddressableElements(bankBits.length) * addressableBankGroups;
    validateAddressableLimit(memSpec.bankGroupsPerChannel, addressableBankGroups, "Bank group");
    validateAddressableLimit(memSpec.banksPerChannel, absoluteBanks, "Bank");
    validateAddressableLimit(memSpec.rowsPerBank, calculateAddressableElements(rowBits.length), "Row");
    validateAddressableLimit(memSpec.columnsPerRow, calculateAddressableElements(columnBits.length), "Column");
}

unsigned AddressDecoder::calculateAddressableElements(unsigned bitSize) const {
    return std::lround(std::pow(2, bitSize));
}

void AddressDecoder::validateAddressableLimit(unsigned memSpecValue, unsigned addressableValue, const std::string& name) {
    if (memSpecValue > addressableValue || memSpecValue <= (addressableValue >> 1)) {
        SC_REPORT_FATAL("AddressDecoder", (name + " bit mapping does not match the memspec configuration").c_str());
    }
}

bool AddressDecoder::isPowerOfTwo(unsigned value) const {
    return value != 0 && (value & (value - 1)) == 0;
}

unsigned AddressDecoder::checkByteBits(const MemSpec& memSpec) {
    unsigned bytesPerBeat = memSpec.dataBusWidth / 8;
    unsigned numOfByteBits = std::ceil(std::log2(memSpec.dataBusWidth / 8.0));

    if (!isPowerOfTwo(bytesPerBeat)) {
        SC_REPORT_WARNING("AddressDecoder",
            ("Bytes per beat are not power of two! \nAssuming " +
                std::to_string(numOfByteBits) + " reserved byte bits.").c_str());
    }

    if (byteBits.length < numOfByteBits) {
        SC_REPORT_FATAL("AddressDecoder",
            ("Byte bits are not continuous starting from 0. (bytesPerBeat: " +
            std::to_string(bytesPerBeat) +
            "B -> number of byte-bits: " +
            std::to_string(numOfByteBits) + ")").c_str());
    }

    return numOfByteBits;
}


void AddressDecoder::checkBurstLengthBits(const MemSpec& memSpec) {
    unsigned numOfMaxBurstLengthBits = std::ceil(std::log2(memSpec.maxBurstLength));
    burstBitMask = createBitmask(numOfMaxBurstLengthBits, byteBits.length);

    if (!isPowerOfTwo(memSpec.maxBurstLength)) {
        SC_REPORT_WARNING("AddressDecoder",
            ("Maximum burst length (" + std::to_string(memSpec.maxBurstLength) +
            ") is not power of two! \nAssuming " +
            std::to_string(numOfMaxBurstLengthBits) +
            " reserved burst bits.").c_str());
    }

    std::bitset<ADDRESS_WIDTH> burstBitset(((1 << numOfMaxBurstLengthBits) - 1) << columnBits.idx);
    std::bitset<ADDRESS_WIDTH> columnBitset;
    for (size_t i = 0; i < columnBits.length; i++) {
        columnBitset |= mappingMatrix[columnBits.idx + i];
    }
    if ((columnBits.length < numOfMaxBurstLengthBits) || ((columnBitset & burstBitset) != burstBitset)) {
        SC_REPORT_FATAL("AddressDecoder",
            ("No continuous column bits for maximum burst length (maximumBurstLength: " +
            std::to_string(memSpec.maxBurstLength) +
            " -> required number of burst bits: " +
            std::to_string(numOfMaxBurstLengthBits) + ")").c_str());
    }
}


DecodedAddress AddressDecoder::decodeAddress(uint64_t address) const
{
    uint64_t encAddr = address;
    if (encAddr > upperBoundAddress)
    {
        SC_REPORT_WARNING("AddressDecoder",
                          ("Address " + std::to_string(encAddr) +
                           " out of range (maximum address is " + std::to_string(upperBoundAddress) +
                           ")")
                              .c_str());
    }

    uint64_t result = gf2Multiplication(encAddr, mappingMatrix);

    /**
     * @brief Extracts a specific AddressComponent from the result address.
     */
    auto get_component = [&result](const AddressComponent& component) -> unsigned {
        if (component.idx < 0 || component.length <= 0) {
            return static_cast<unsigned>(0);
        }
        // Create mask
        uint64_t mask = (1ULL << component.length) - 1;
        // Shift and apply the mask
        return static_cast<unsigned>((result >> component.idx) & mask);
    };

    DecodedAddress decAddr;
    decAddr.channel = get_component(channelBits);
    decAddr.rank = get_component(rankBits);
    decAddr.rank |= get_component(pseudochannelBits);
    decAddr.stack = get_component(stackBits);
    decAddr.bankgroup = get_component(bankGroupBits);
    decAddr.bank = get_component(bankBits);
    decAddr.row = get_component(rowBits);
    decAddr.column= get_component(columnBits);
    decAddr.byte = get_component(byteBits);

    if (np2Flag && !isAddressValid(decAddr))
    {
        SC_REPORT_WARNING("AddressDecoder",
                          ("Address " + std::to_string(encAddr) + " invalid)").c_str());
    }

    // Important: This offsets must be added after(!) the address validation!
    decAddr.bankgroup = decAddr.bankgroup + decAddr.rank * bankgroupsPerRank;
    decAddr.bank = decAddr.bank + decAddr.bankgroup * banksPerGroup;

    return decAddr;
}

bool AddressDecoder::isAddressValid(const DecodedAddress& decAddr) const
{
    unsigned it;

    // Check if burst address is within limits
    auto mask = burstBitMask;
    for (it = 0; ((mask >> it) & 1) == 0; it++) { }
    if ((decAddr.column & (mask >> it)) >= memSpec->maxBurstLength)
    {
        SC_REPORT_WARNING("AddressDecoder", ("Burst address out of bounds (given: " +
            std::to_string((decAddr.column & (mask >> it))) +
            ", MemSpec: " + std::to_string(memSpec->maxBurstLength) + ")")
            .c_str()
        );
        return false;
    }

    // Check all address components for validity
    if ((decAddr.channel >= memSpec->numberOfChannels) ||
        (decAddr.rank >= memSpec->ranksPerChannel) ||
        (decAddr.bankgroup >= memSpec->bankGroupsPerChannel) ||
        (decAddr.bank >= memSpec->banksPerGroup) ||
        (decAddr.row >= memSpec->rowsPerBank) ||
        (decAddr.column >= memSpec->columnsPerRow))
        {
            SC_REPORT_WARNING("AddressDecoder",
                "Invalid address: channel, rank, bankgroup, bank, row or column exeeds memSpec limits.");
        return false;
        }

    return true;
}

unsigned AddressDecoder::decodeChannel(uint64_t encAddr) const
{
    if (encAddr > upperBoundAddress)
        SC_REPORT_WARNING("AddressDecoder",
                          ("Address " + std::to_string(encAddr) +
                           " out of range (maximum address is " + std::to_string(upperBoundAddress) +
                           ")")
                              .c_str());

    uint64_t result = gf2Multiplication(encAddr, mappingMatrix);

    /**
     * @brief Extracts a specific AddressComponent from the result address.
     */
    auto get_component = [&result](const AddressComponent& component) -> unsigned {
        if (component.idx < 0 || component.length <= 0) {
            return static_cast<unsigned>(0);
        }
        // Create mask
        uint64_t mask = (1ULL << component.length) - 1;
        // Shift and apply the mask
        return static_cast<unsigned>((result >> component.idx) & mask);
    };

    return get_component(channelBits);
}

uint64_t AddressDecoder::encodeAddress(DecodedAddress decAddr) const
{
    // Convert absolute addressing for bank, bankgroup to relative
    decAddr.bankgroup = decAddr.bankgroup % bankgroupsPerRank;
    decAddr.bank = decAddr.bank % banksPerGroup;

    uint64_t mappedAddr = 0;

    /**
     * @brief Inserts a specific AddressComponent to the mappedAddress.
     */
    auto set_component = [&mappedAddr](const AddressComponent& component, const unsigned int value) -> unsigned {
        if (component.idx < 0 || component.length <= 0) {
            return mappedAddr;
        }
        // Shift and add to mappedAddress
        return static_cast<unsigned>((value << component.idx) | mappedAddr);
    };

    mappedAddr = set_component(channelBits, decAddr.channel);
    mappedAddr = set_component(rankBits, decAddr.rank);
    mappedAddr = set_component(pseudochannelBits, decAddr.rank);
    mappedAddr = set_component(stackBits, decAddr.stack);
    mappedAddr = set_component(bankGroupBits, decAddr.bankgroup);
    mappedAddr = set_component(bankBits, decAddr.bank);
    mappedAddr = set_component(rowBits, decAddr.row);
    mappedAddr = set_component(columnBits, decAddr.column);
    mappedAddr = set_component(byteBits, decAddr.byte);

    return gf2Multiplication(mappedAddr, transposedMappingMatrix);
}

void AddressDecoder::print() const
{
    std::cout << headline << std::endl;
    std::cout << "Used Address Mapping:" << std::endl;
    std::cout << std::endl;

    auto printBits = [&](const AddressComponent& component) {
        int startIdx = component.idx;
        int length = component.length;
        if (startIdx < 0) return;

        for (int i = 0; i<length; ++i) {
            std::cout << " " << component.name << " " << std::setw(2) << mappingMatrix[startIdx + i] << std::endl;
        }
    };

    printBits(byteBits);
    printBits(columnBits);
    printBits(rowBits);
    printBits(bankBits);
    printBits(bankGroupBits);
    printBits(stackBits);
    printBits(rankBits);
    printBits(pseudochannelBits);
    printBits(channelBits);
}
} // namespace DRAMSys