arch-arm: Add matrix register support for SME
We add support for the matrix registers to the Arm architecture. This will be used to implement support for Arm's Scalable Matrix Extension (SME) in subsequent commits. We add an implementation of a matrix register for the Arm architecture. These are akin to 2D vector registers in the sense that they can be dynamically viewed as a variety of element sizes. As widening the element size would reduce the matrix size by a factor of element size, we instead layer multiple tiles of wider elements onto the underlying matrix storage in order to retain square matrices. We separate the storage of the matrix from the different views one can have. The potential views are: * Tiles: View the matrix as one or more tiles using a specified element size. As the element size increases the number of indexable tiles increases. When using the smallest granularity element size (bytes) there is a single tile. As an example, using 32-bit elements yields 4 tiles. Tiles are interleaved onto the underlaying matrix modulo element size. A tile supports 2D indexing ([][]), with the first index specifying the row index, and the second the column (element index within the row). * A Horizontal/Vertical slice (row or a column) of a tile: Take the aforementioned tile, and extract a specified row or column slice from it. A slice supports standard []-based indexing. A tile slice must use the same underlying element type as is used for the tile. * A Horizontal/Vertical slice (row or column) of the underlying matrix storage: Treat the matrix register as an array of vectors (rows or columns, rows preferred due to them being indepependent of the element size being used). On simulator start-up the matrix registers are initialised to a maximum size. At run-time the used size can by dynamically adjusted. However, please note that as the matrix register class doesn't know if a smaller size is being used, the class itself doesn't do any bounds checking itself. This is left to the user. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I6a6a05154846e4802e9822bbbac00ab2c39538ed Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64334 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Giacomo Travaglini
parent
fed81f3408
commit
5c43523d53
@@ -54,6 +54,7 @@ if env['USE_ARM_ISA']:
|
||||
'../../cpu/reg_class.cc',
|
||||
'../../sim/bufval.cc', '../../sim/cur_tick.cc',
|
||||
'regs/int.cc')
|
||||
GTest('matrix.test', 'matrix.test.cc')
|
||||
Source('decoder.cc', tags='arm isa')
|
||||
Source('faults.cc', tags='arm isa')
|
||||
Source('htm.cc', tags='arm isa')
|
||||
|
||||
@@ -76,7 +76,6 @@ namespace
|
||||
|
||||
/* Not applicable to ARM */
|
||||
RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs);
|
||||
RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
@@ -561,6 +560,12 @@ ISA::copyRegsFrom(ThreadContext *src)
|
||||
for (auto &id: vecElemClass)
|
||||
tc->setReg(id, src->getReg(id));
|
||||
|
||||
ArmISA::MatRegContainer mc;
|
||||
for (auto &id: matRegClass) {
|
||||
src->getReg(id, &mc);
|
||||
tc->setReg(id, &mc);
|
||||
}
|
||||
|
||||
// setMiscReg "with effect" will set the misc register mapping correctly.
|
||||
// e.g. updateRegMap(val)
|
||||
tc->setMiscReg(MISCREG_CPSR, src->readMiscRegNoEffect(MISCREG_CPSR));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010, 2012-2021 ARM Limited
|
||||
* Copyright (c) 2010, 2012-2022 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "arch/arm/mmu.hh"
|
||||
#include "arch/arm/pcstate.hh"
|
||||
#include "arch/arm/regs/int.hh"
|
||||
#include "arch/arm/regs/mat.hh"
|
||||
#include "arch/arm/regs/misc.hh"
|
||||
#include "arch/arm/regs/vec.hh"
|
||||
#include "arch/arm/self_debug.hh"
|
||||
|
||||
572
src/arch/arm/matrix.hh
Normal file
572
src/arch/arm/matrix.hh
Normal file
@@ -0,0 +1,572 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file arch/arm/matrix.hh
|
||||
* Matrix Register Specification.
|
||||
*
|
||||
* In this file we add three new classes which are used to provide both
|
||||
* the backing storage for matrix registers (MatStore) and for accessing
|
||||
* them using a set of views onto the backing store (Tile, TileSlice).
|
||||
*
|
||||
* The MatStore provides the backing store for the matrix, handles the
|
||||
* serialisation/unserialisation, and provides interfaces to obtain
|
||||
* views of the matrix. The underlying element for the MatStore is a
|
||||
* byte, and it uses two templated parameters, X and Y, to set the
|
||||
* overall size of the matrix. The common use case will be that X and Y
|
||||
* are the same size, yielding a square matrix, but this is not a
|
||||
* requirement - it is possible to create non-square matricies too if
|
||||
* such a thing is desired.
|
||||
*
|
||||
* The Tile provides a view on top of the MatStore which is intended to
|
||||
* preserve the original aspect ratio of the underlying MatStore as the
|
||||
* element size scales. It does so by row-wise interleaving one or more
|
||||
* sub-matrices on top of the MatStore, where the number of sub-matrices
|
||||
* is governed by the element size (in bytes) itself. As an example, if
|
||||
* the elements are half-words, i.e. 2 bytes wide, then there are two
|
||||
* interleaved matrices with even rows belonging to sub-matrix 0 and odd
|
||||
* rows belonging to sub-matrix 1. However, each of these sub-matricies
|
||||
* maintains the original aspect ratio of the MatStore - the element
|
||||
* size has doubled (bytes => half words), hence each row contains half
|
||||
* the original number of elements, and each sub-matrix contains half of
|
||||
* the number of rows themselves.
|
||||
*
|
||||
* The TileSlice class provides a view of either a row or a column of a
|
||||
* matrix, and can be generated from either the MatStore directly, or
|
||||
* from the Tile. In the former case this allows a matrix to be viewed
|
||||
* as a set of rows or columns, and in the latter this same approach is
|
||||
* applied to the Tile. In both cases this is achieved by adjusting the
|
||||
* striding through the backing store accordingly.
|
||||
*
|
||||
* The intended usage of the views is as follows:
|
||||
*
|
||||
* // declare an 8x8 matrix of bytes
|
||||
* using Mat8x8 = MatStore<8, 8>;
|
||||
*
|
||||
* // Create a matrix and make sure that it is zeroed
|
||||
* Mat8x8 mat;
|
||||
* mat.zero();
|
||||
*
|
||||
* // Interleave four tiles of int32_t onto the 8x8 matrix, and get
|
||||
* // tile 0. (Each of these tiles will be a 2x2 matrix)
|
||||
* auto mat0 = mat.asTile<int32_t>(0);
|
||||
*
|
||||
* // Set both elements of row 0 to 10
|
||||
* for (auto i = 0; i < 2; ++i) {
|
||||
* mat0[0][i] = 10;
|
||||
* }
|
||||
*
|
||||
* // Sum both elements of row 1
|
||||
* int32_t sum = 0;
|
||||
* auto row = mat0.asHSlice(1);
|
||||
* for (auto i = 0; i < 2; ++i) {
|
||||
* sum += row[i];
|
||||
* }
|
||||
*
|
||||
* // print column 1 of the whole MatStore when viewed as uint16_t
|
||||
* col = mat.asVSlice<uint16_t>(1);
|
||||
* for (auto i = 0; i < 4; ++i) {
|
||||
* std::cout << col[i] << std::endl;
|
||||
* }
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_ARM_MATRIX_HH__
|
||||
#define __ARCH_ARM_MATRIX_HH__
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <type_traits>
|
||||
|
||||
#include "base/cprintf.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/types.hh"
|
||||
#include "sim/serialize_handlers.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
constexpr unsigned MaxMatRegRowLenInBytes = 256;
|
||||
constexpr unsigned MaxMatRegRows = 256;
|
||||
|
||||
// Forward declarations
|
||||
template <size_t X, size_t Y>
|
||||
class MatStore;
|
||||
template <typename ElemType, typename Container>
|
||||
class Tile;
|
||||
|
||||
template <size_t X, size_t Y>
|
||||
struct ParseParam<MatStore<X, Y>>;
|
||||
|
||||
/**
|
||||
* @brief Provides a view of a horizontal slice of either a
|
||||
* MatStore or a Tile.
|
||||
*
|
||||
* Based on whether this view it is being used from the MatStore
|
||||
* directly or from the Tile different parameters are
|
||||
* used. Behind the scenes the parameters are used to stride through the
|
||||
* (linear) backing store in order to return or maniplate the desired
|
||||
* elements of the row/column.
|
||||
*
|
||||
* @tparam ElemType The type of element to use for the view.
|
||||
* @tparam Container The type of container being used as the backing store.
|
||||
* @tparam FromTile Set true if operating on an interleaved tile.
|
||||
*/
|
||||
template <typename ElemType, typename Container, bool FromTile>
|
||||
class HorizontalSlice
|
||||
{
|
||||
template <size_t, size_t> friend class MatStore;
|
||||
template <typename, typename> friend class Tile;
|
||||
|
||||
private:
|
||||
Container * container;
|
||||
size_t index;
|
||||
size_t xElems;
|
||||
size_t yElems;
|
||||
size_t startElts;
|
||||
size_t strideElts;
|
||||
|
||||
private:
|
||||
HorizontalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes,
|
||||
size_t idx)
|
||||
: container(&cnt), index(idx),
|
||||
xElems(container->xSize() / sizeof(ElemType)),
|
||||
yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)),
|
||||
startElts(_startBytes / sizeof(ElemType)),
|
||||
strideElts(_strideBytes / sizeof(ElemType))
|
||||
{
|
||||
gem5_assert(xElems > 0, "The number of xElems cannot be 0");
|
||||
gem5_assert(yElems > 0, "The number of yElems cannot be 0");
|
||||
|
||||
// Make sure that we have a whole multiple of an element size
|
||||
assert (_startBytes % sizeof(ElemType) == 0);
|
||||
assert (_strideBytes % sizeof(ElemType) == 0);
|
||||
|
||||
if constexpr (!FromTile) {
|
||||
// If we are not operating on a tile, the stride must be the
|
||||
// same as the row length, X.
|
||||
assert(_strideBytes == container->xSize());
|
||||
} else {
|
||||
// If we are operating on a tile, then the stride must be
|
||||
// sizeof(ElemSize) greater than X.
|
||||
assert(_strideBytes / container->xSize() == sizeof(ElemType));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
ElemType&
|
||||
operator[](size_t elem_idx)
|
||||
{
|
||||
assert(elem_idx < xElems);
|
||||
size_t linear_index = startElts + index * strideElts + elem_idx;
|
||||
return container->template rawPtr<ElemType>()[linear_index];
|
||||
};
|
||||
|
||||
void
|
||||
zero()
|
||||
{
|
||||
for (int i = 0; i < xElems; ++i) {
|
||||
(*this)[i] = (ElemType)0;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Provides a view of a vertical slice of either a
|
||||
* MatStore or a Tile.
|
||||
*
|
||||
* Based on whether this view it is being used from the MatStore
|
||||
* directly or from the Tile different parameters are used. Behind the
|
||||
* scenes the parameters are used to stride through the (linear) backing
|
||||
* store in order to return or maniplate the desired elements of the
|
||||
* row/column.
|
||||
*
|
||||
* @tparam ElemType The type of element to use for the view.
|
||||
* @tparam Container The type of container being used as the backing store.
|
||||
* @tparam FromTile Set true if operating on an interleaved tile.
|
||||
*/
|
||||
template <typename ElemType, typename Container, bool FromTile>
|
||||
class VerticalSlice
|
||||
{
|
||||
template <size_t, size_t> friend class MatStore;
|
||||
template <typename, typename> friend class Tile;
|
||||
|
||||
private:
|
||||
Container * container;
|
||||
size_t index;
|
||||
size_t xElems;
|
||||
size_t yElems;
|
||||
size_t startElts;
|
||||
size_t strideElts;
|
||||
|
||||
private:
|
||||
VerticalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes, size_t idx)
|
||||
: container(&cnt), index(idx),
|
||||
xElems(container->xSize() / sizeof(ElemType)),
|
||||
yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)),
|
||||
startElts(_startBytes / sizeof(ElemType)),
|
||||
strideElts(_strideBytes / sizeof(ElemType))
|
||||
{
|
||||
gem5_assert(xElems > 0, "The number of xElems cannot be 0");
|
||||
gem5_assert(yElems > 0, "The number of yElems cannot be 0");
|
||||
|
||||
// Make sure that we have a whole multiple of an element size
|
||||
assert (_startBytes % sizeof(ElemType) == 0);
|
||||
assert (_strideBytes % sizeof(ElemType) == 0);
|
||||
|
||||
if constexpr (!FromTile) {
|
||||
// If we are not operating on a tile, the stride must be the
|
||||
// same as the row length, X.
|
||||
assert(_strideBytes == container->xSize());
|
||||
} else {
|
||||
// If we are operating on a tile, then the stride must be
|
||||
// sizeof(ElemSize) greater than X.
|
||||
assert(_strideBytes / container->xSize() == sizeof(ElemType));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
ElemType&
|
||||
operator[](size_t elem_idx)
|
||||
{
|
||||
assert(elem_idx < yElems);
|
||||
size_t linear_index = startElts + elem_idx * strideElts + index;
|
||||
return container->template rawPtr<ElemType>()[linear_index];
|
||||
};
|
||||
|
||||
void
|
||||
zero()
|
||||
{
|
||||
for (int i = 0; i < yElems; ++i) {
|
||||
(*this)[i] = (ElemType)0;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Provides a view of a matrix that is row-interleaved onto a
|
||||
* MatStore.
|
||||
*
|
||||
* This class largely acts as a shim between the MatStore and the
|
||||
* TileSlice view. The size of the ElemType and the index passed to the
|
||||
* constructor are used to calculate the stride and start which are
|
||||
* passed to the TileSlice view to control how it strides through the
|
||||
* backing store.
|
||||
*
|
||||
* @tparam ElemType The type of element to use for the view.
|
||||
* @tparam Container The type of container being used as the backing store.
|
||||
*/
|
||||
template <typename ElemType, typename Container>
|
||||
class Tile
|
||||
{
|
||||
template <size_t, size_t> friend class MatStore;
|
||||
|
||||
// We "calculate" the number of possible tiles based on the element size
|
||||
static constexpr size_t NUM_TILES = sizeof(ElemType);
|
||||
|
||||
private:
|
||||
Container * container;
|
||||
size_t index;
|
||||
size_t startBytes;
|
||||
size_t strideBytes;
|
||||
|
||||
private:
|
||||
Tile(Container& cnt, size_t idx)
|
||||
: container(&cnt), index(idx)
|
||||
{
|
||||
assert(index < NUM_TILES);
|
||||
startBytes = container->xSize() * index;
|
||||
strideBytes = NUM_TILES * container->xSize();
|
||||
};
|
||||
|
||||
public:
|
||||
auto
|
||||
operator[](size_t idx)
|
||||
{
|
||||
assert(idx < (container->ySize() / NUM_TILES));
|
||||
return asHSlice(idx);
|
||||
};
|
||||
|
||||
Container*
|
||||
getContainer()
|
||||
{
|
||||
return container;
|
||||
};
|
||||
|
||||
auto
|
||||
asHSlice(size_t row_idx)
|
||||
{
|
||||
assert(row_idx < container->ySize() / NUM_TILES);
|
||||
return HorizontalSlice<ElemType, Container, true>(*container,
|
||||
startBytes,
|
||||
strideBytes,
|
||||
row_idx);
|
||||
};
|
||||
|
||||
auto
|
||||
asVSlice(size_t col_idx)
|
||||
{
|
||||
assert(col_idx < container->xSize());
|
||||
return VerticalSlice<ElemType, Container, true>(*container, startBytes,
|
||||
strideBytes, col_idx);
|
||||
};
|
||||
|
||||
void
|
||||
zero()
|
||||
{
|
||||
for (int i = 0; i < container->ySize() / NUM_TILES; ++i) {
|
||||
// We zero the tile by rows. We need to do it this way due
|
||||
// to the interleaving.
|
||||
auto row = this->asHSlice(i);
|
||||
row.zero();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// Base container class for a matrix. Allows for non-square matricies.
|
||||
/**
|
||||
* @brief Backing store for matrices.
|
||||
*
|
||||
* This class provides the backing store for matricies, and is largely a
|
||||
* wrapper around an std::array of bytes. This class provides some basic
|
||||
* interfaces for assignment (copy the backing store) and comparison,
|
||||
* and provides the interface for generating views onto the backing
|
||||
* store. It is these views that are intended to be used by the end-user
|
||||
* of the matrix in most cases.
|
||||
*
|
||||
* This class is also responsible for handling the
|
||||
* serialisation/unserialisation of matrix registers (see ShowParam and
|
||||
* ParseParam).
|
||||
*
|
||||
* @tparam X X size in bytes (number of columns).
|
||||
* @tparam Y Y size in bytes (number of rows).
|
||||
*/
|
||||
template <size_t X, size_t Y>
|
||||
class MatStore
|
||||
{
|
||||
static_assert(X > 0, "X size cannot be 0");
|
||||
static_assert(Y > 0, "Y size cannot be 0");
|
||||
|
||||
static constexpr size_t LINEAR_SIZE = X * Y;
|
||||
|
||||
template <typename, typename, bool> friend class HorizontalSlice;
|
||||
template <typename, typename, bool> friend class VerticalSlice;
|
||||
|
||||
public:
|
||||
static constexpr inline size_t xSize() { return X; };
|
||||
static constexpr inline size_t ySize() { return Y; };
|
||||
static constexpr inline size_t linearSize() { return LINEAR_SIZE; };
|
||||
|
||||
using Container = std::array<uint8_t, LINEAR_SIZE>;
|
||||
using MyClass = MatStore<X, Y>;
|
||||
private:
|
||||
// We need to be able to handle 128-bit types; align accordingly
|
||||
alignas(16) Container container;
|
||||
|
||||
public:
|
||||
/** Constructor */
|
||||
MatStore() {};
|
||||
|
||||
MatStore(const MatStore&) = default;
|
||||
|
||||
void
|
||||
zero()
|
||||
{
|
||||
memset(container.data(), 0 , LINEAR_SIZE);
|
||||
}
|
||||
|
||||
/** Assignment operators. */
|
||||
/** @{ */
|
||||
/** From MatStore */
|
||||
MyClass&
|
||||
operator=(const MyClass& that)
|
||||
{
|
||||
if (&that == this)
|
||||
return *this;
|
||||
memcpy(container.data(), that.container.data(), LINEAR_SIZE);
|
||||
return *this;
|
||||
}
|
||||
/** @} */
|
||||
|
||||
/** Equality operator.
|
||||
* Required to compare thread contexts.
|
||||
*/
|
||||
template<size_t X2, size_t Y2>
|
||||
inline bool
|
||||
operator==(const MatStore<X2, Y2>& that) const
|
||||
{
|
||||
return X == X2 && Y == Y2 &&
|
||||
!memcmp(container.data(), that.container.data(), LINEAR_SIZE);
|
||||
}
|
||||
|
||||
/** Inequality operator.
|
||||
* Required to compare thread contexts.
|
||||
*/
|
||||
template<size_t X2, size_t Y2>
|
||||
bool
|
||||
operator!=(const MatStore<X2, Y2>& that) const
|
||||
{
|
||||
return !operator==(that);
|
||||
}
|
||||
|
||||
private:
|
||||
/** Get pointer to the raw data. */
|
||||
template <typename ElemType>
|
||||
const ElemType* rawPtr() const
|
||||
{
|
||||
return reinterpret_cast<const ElemType*>(container.data());
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
ElemType* rawPtr() { return reinterpret_cast<ElemType*>(container.data()); }
|
||||
|
||||
public:
|
||||
template <typename ElemType>
|
||||
auto
|
||||
asTile(size_t index)
|
||||
{
|
||||
return Tile<ElemType, MyClass>(*this, index);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
auto
|
||||
asHSlice(size_t row_idx)
|
||||
{
|
||||
return HorizontalSlice<ElemType, MyClass, false>(*this, 0, X, row_idx);
|
||||
}
|
||||
|
||||
template <typename ElemType>
|
||||
auto
|
||||
asVSlice(size_t col_idx)
|
||||
{
|
||||
return VerticalSlice<ElemType, MyClass, false>(*this, 0, X, col_idx);
|
||||
}
|
||||
|
||||
friend std::ostream&
|
||||
operator<<(std::ostream& os, const MatStore<X, Y>& v)
|
||||
{
|
||||
// When printing for human consumption, break into 4 byte chunks.
|
||||
ccprintf(os, "[");
|
||||
size_t count = 0;
|
||||
for (auto& b: v.container) {
|
||||
if (count && (count % 4) == 0)
|
||||
os << "_";
|
||||
ccprintf(os, "%02x", b);
|
||||
count++;
|
||||
}
|
||||
ccprintf(os, "]");
|
||||
return os;
|
||||
}
|
||||
|
||||
/** @} */
|
||||
/**
|
||||
* Used for serialization/unserialisation.
|
||||
*/
|
||||
friend ParseParam<MatStore<X, Y>>;
|
||||
friend ShowParam<MatStore<X, Y>>;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Calls required for serialization/deserialization
|
||||
*/
|
||||
/** @{ */
|
||||
template <size_t X, size_t Y>
|
||||
struct ParseParam<MatStore<X, Y>>
|
||||
{
|
||||
static bool
|
||||
parse(const std::string &str, MatStore<X, Y> &value)
|
||||
{
|
||||
fatal_if(str.size() > 2 * X * Y,
|
||||
"Matrix register value overflow at unserialize");
|
||||
fatal_if(str.size() < 2 * X * Y,
|
||||
"Matrix register value underflow at unserialize");
|
||||
|
||||
for (int i = 0; i < X * Y; i++) {
|
||||
uint8_t b = 0;
|
||||
if (2 * i < str.size())
|
||||
b = stoul(str.substr(i * 2, 2), nullptr, 16);
|
||||
value.template rawPtr<uint8_t>()[i] = b;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t X, size_t Y>
|
||||
struct ShowParam<MatStore<X, Y>>
|
||||
{
|
||||
static void
|
||||
show(std::ostream &os, const MatStore<X, Y> &value)
|
||||
{
|
||||
for (auto& b: value.container)
|
||||
ccprintf(os, "%02x", b);
|
||||
}
|
||||
};
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Dummy type aliases and constants for architectures that do not
|
||||
* implement matrix registers.
|
||||
*/
|
||||
/** @{ */
|
||||
struct DummyMatRegContainer
|
||||
{
|
||||
RegVal filler = 0;
|
||||
bool operator == (const DummyMatRegContainer &d) const { return true; }
|
||||
bool operator != (const DummyMatRegContainer &d) const { return true; }
|
||||
};
|
||||
template <>
|
||||
struct ParseParam<DummyMatRegContainer>
|
||||
{
|
||||
static bool
|
||||
parse(const std::string &s, DummyMatRegContainer &value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(DummyMatRegContainer) == sizeof(RegVal));
|
||||
static inline std::ostream &
|
||||
operator<<(std::ostream &os, const DummyMatRegContainer &d)
|
||||
{
|
||||
return os;
|
||||
}
|
||||
/** @} */
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __ARCH_ARM_MATRIX_HH__
|
||||
453
src/arch/arm/matrix.test.cc
Normal file
453
src/arch/arm/matrix.test.cc
Normal file
@@ -0,0 +1,453 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "arch/arm/matrix.hh"
|
||||
|
||||
using namespace gem5;
|
||||
|
||||
TEST(Matrix, Size)
|
||||
{
|
||||
{
|
||||
// Minimum size
|
||||
MatStore<1, 1> mat;
|
||||
ASSERT_EQ(1, mat.linearSize());
|
||||
}
|
||||
|
||||
{
|
||||
// Medium size
|
||||
constexpr size_t x_size = MaxMatRegRowLenInBytes / 2;
|
||||
constexpr size_t y_size = MaxMatRegRows / 2;
|
||||
MatStore<x_size, y_size> mat;
|
||||
ASSERT_EQ(x_size * y_size, mat.linearSize());
|
||||
}
|
||||
|
||||
{
|
||||
// Maximum size
|
||||
MatStore<MaxMatRegRowLenInBytes, MaxMatRegRows> mat;
|
||||
ASSERT_EQ(MaxMatRegRowLenInBytes * MaxMatRegRows, mat.linearSize());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, Zero)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// zeroing the matrix
|
||||
mat.zero();
|
||||
|
||||
// checking if every matrix element is set to zero
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
ASSERT_EQ(tile[i][j], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, ZeroTiles)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto byte_tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing the whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// zeroing the half-word tile 0 of matrix
|
||||
auto half_word_tile = mat.asTile<uint16_t>(0);
|
||||
half_word_tile.zero();
|
||||
|
||||
// Check that every element of half-word tile 0 is zero
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
for (auto j = 0; j < size / 2; j++) {
|
||||
ASSERT_EQ(half_word_tile[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that every element of half-word tile 1 is 0xAAAA (note the
|
||||
// double width of the element)
|
||||
half_word_tile = mat.asTile<uint16_t>(1);
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
for (auto j = 0; j < size / 2; j++) {
|
||||
ASSERT_EQ(half_word_tile[i][j], 0xAAAA);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if every matrix element on an even row is set to zero
|
||||
for (auto i = 0; i < size; i += 2) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if every matrix element on an odd row is set to 0xAA
|
||||
for (auto i = 1; i < size; i += 2) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, ZeroTileHSlice)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto byte_tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing the whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// zeroing the 0th row of half-word tile 0
|
||||
auto half_word_tile = mat.asTile<uint16_t>(0);
|
||||
auto row = half_word_tile.asHSlice(0);
|
||||
row.zero();
|
||||
|
||||
// Check that every element of the row is zero
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
ASSERT_EQ(row[i], 0);
|
||||
}
|
||||
|
||||
// Check that every element of row 1 is 0xAAAA
|
||||
row = half_word_tile.asHSlice(1);
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
ASSERT_EQ(row[i], 0xAAAA);
|
||||
}
|
||||
|
||||
// Check that row 0 of the byte tile is zero, and that all remaining
|
||||
// rows are unaffected
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
if (i == 0) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
} else {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, ZeroTileVSlice)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto byte_tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing the whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// zeroing the 0th column of half-word tile 0
|
||||
auto half_word_tile = mat.asTile<uint16_t>(0);
|
||||
auto col = half_word_tile.asVSlice(0);
|
||||
col.zero();
|
||||
|
||||
// Check that every element of the column is zero
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
ASSERT_EQ(col[i], 0);
|
||||
}
|
||||
|
||||
// Check that every element of column 1 is 0xAAAA
|
||||
col = half_word_tile.asVSlice(1);
|
||||
for (auto i = 0; i < size / 2; i++) {
|
||||
ASSERT_EQ(col[i], 0xAAAA);
|
||||
}
|
||||
|
||||
// Check that elements 0 & 1 of the byte tile are zero for even rows,
|
||||
// and that all remaining elements are unaffected
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
if (i % 2 == 0 && (j == 0 || j == 1)) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
} else {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, ZeroHSlice)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto byte_tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing the whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// Now we get a row directly from the matrix (as words, because it
|
||||
// should make no difference), zero it
|
||||
auto row = mat.asHSlice<uint32_t>(4);
|
||||
row.zero();
|
||||
|
||||
// Check that every element of the row is zero
|
||||
for (auto i = 0; i < size / 4; i++) {
|
||||
ASSERT_EQ(row[i], 0);
|
||||
}
|
||||
|
||||
// Check that row 4 of the byte tile is zero, and that all remaining
|
||||
// rows are unaffected
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
if (i == 4) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
} else {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Matrix, ZeroVSlice)
|
||||
{
|
||||
constexpr size_t size = 16;
|
||||
MatStore<size, size> mat;
|
||||
auto byte_tile = mat.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing the whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// Now we get a column directly from the matrix, zero it
|
||||
auto col = mat.asVSlice<uint8_t>(4);
|
||||
col.zero();
|
||||
|
||||
// Check that every element of the column is zero
|
||||
for (auto i = 0; i < size; i++) {
|
||||
ASSERT_EQ(col[i], 0);
|
||||
}
|
||||
|
||||
// Check that col 4 of the byte tile is zero, and that all remaining
|
||||
// rows are unaffected
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
if (j == 4) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
} else {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now we repeat with a wider element type too. Reinitializing the
|
||||
// whole tile with non-zero value
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
byte_tile[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// Now we get a word-wide column directly from the matrix, zero it
|
||||
auto wide_col = mat.asVSlice<uint32_t>(1);
|
||||
wide_col.zero();
|
||||
|
||||
// Check that every element of the column is zero
|
||||
for (auto i = 0; i < size; i++) {
|
||||
ASSERT_EQ(wide_col[i], 0);
|
||||
}
|
||||
|
||||
// Check that cols 4-7 of the byte tile are zero, and that all
|
||||
// remaining rows are unaffected
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
if (j >= 4 && j <= 7) {
|
||||
ASSERT_EQ(byte_tile[i][j], 0);
|
||||
} else {
|
||||
ASSERT_EQ(byte_tile[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TwoDifferentMatRegs : public testing::Test
|
||||
{
|
||||
protected:
|
||||
static constexpr size_t size = 4;
|
||||
|
||||
MatStore<size, size> mat1;
|
||||
MatStore<size, size> mat2;
|
||||
|
||||
void
|
||||
SetUp() override
|
||||
{
|
||||
auto tile1 = mat1.asTile<uint8_t>(0);
|
||||
auto tile2 = mat2.asTile<uint8_t>(0);
|
||||
|
||||
// Initializing with non-zero value for matrix 1
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
tile1[i][j] = 0xAA;
|
||||
}
|
||||
}
|
||||
|
||||
// Initializing with zero value for matrix 2
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
tile2[i][j] = 0x0;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Testing operator=
|
||||
TEST_F(TwoDifferentMatRegs, Assignment)
|
||||
{
|
||||
// Copying the matrix
|
||||
mat2 = mat1;
|
||||
|
||||
auto tile2 = mat2.asTile<uint8_t>(0);
|
||||
|
||||
// Checking if matrix 2 elements are 0xAA
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
ASSERT_EQ(tile2[i][j], 0xAA);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Testing operator==
|
||||
TEST_F(TwoDifferentMatRegs, Equality)
|
||||
{
|
||||
// Equality check
|
||||
ASSERT_TRUE(mat1 == mat1);
|
||||
ASSERT_TRUE(mat2 == mat2);
|
||||
ASSERT_FALSE(mat1 == mat2);
|
||||
}
|
||||
|
||||
// Testing operator!=
|
||||
TEST_F(TwoDifferentMatRegs, Inequality)
|
||||
{
|
||||
// Inequality check
|
||||
ASSERT_FALSE(mat1 != mat1);
|
||||
ASSERT_FALSE(mat2 != mat2);
|
||||
ASSERT_TRUE(mat1 != mat2);
|
||||
}
|
||||
|
||||
// Testing operator<<
|
||||
TEST_F(TwoDifferentMatRegs, Printing)
|
||||
{
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream << mat1;
|
||||
ASSERT_EQ(stream.str(), "[aaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaaa]");
|
||||
}
|
||||
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream << mat2;
|
||||
ASSERT_EQ(stream.str(), "[00000000_00000000_00000000_00000000]");
|
||||
}
|
||||
}
|
||||
|
||||
// Testing ParseParam
|
||||
TEST_F(TwoDifferentMatRegs, ParseParam)
|
||||
{
|
||||
ParseParam<decltype(mat1)> parser;
|
||||
|
||||
parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1);
|
||||
parser.parse("cccccccccccccccccccccccccccccccc", mat2);
|
||||
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < size; j++) {
|
||||
ASSERT_EQ(mat1.asTile<uint8_t>(0)[i][j], 0xbb);
|
||||
ASSERT_EQ(mat2.asTile<uint8_t>(0)[i][j], 0xcc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Testing ParseParam Underflow
|
||||
TEST_F(TwoDifferentMatRegs, ParseParamUnderflow)
|
||||
{
|
||||
ParseParam<decltype(mat1)> parser;
|
||||
|
||||
// We should trigger a fatal() here.
|
||||
EXPECT_ANY_THROW(parser.parse("b", mat1));
|
||||
}
|
||||
|
||||
// Testing ParseParam Overflow
|
||||
TEST_F(TwoDifferentMatRegs, ParseParamOverflow)
|
||||
{
|
||||
ParseParam<decltype(mat1)> parser;
|
||||
|
||||
// We should trigger a fatal() here.
|
||||
EXPECT_ANY_THROW(parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1));
|
||||
}
|
||||
|
||||
// Testing ShowParam
|
||||
TEST_F(TwoDifferentMatRegs, ShowParam)
|
||||
{
|
||||
ShowParam<decltype(mat1)> parser;
|
||||
|
||||
{
|
||||
std::stringstream ss;
|
||||
parser.show(ss, mat1);
|
||||
ASSERT_EQ(ss.str(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||||
}
|
||||
|
||||
{
|
||||
std::stringstream ss;
|
||||
parser.show(ss, mat2);
|
||||
ASSERT_EQ(ss.str(), "00000000000000000000000000000000");
|
||||
}
|
||||
}
|
||||
136
src/arch/arm/regs/mat.hh
Normal file
136
src/arch/arm/regs/mat.hh
Normal file
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_ARM_REGS_MAT_HH__
|
||||
#define __ARCH_ARM_REGS_MAT_HH__
|
||||
|
||||
#include "arch/arm/types.hh"
|
||||
#include "arch/arm/matrix.hh"
|
||||
#include "cpu/reg_class.hh"
|
||||
#include "debug/MatRegs.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace ArmISA
|
||||
{
|
||||
|
||||
/*
|
||||
* We do the same as is done for vector registers when creating the
|
||||
* matricies. One of the things to note is that this allocates the
|
||||
* largest architecturally possible matrix - this is a bit inefficient
|
||||
* from a memory point of view, but at this point we do not know which
|
||||
* vector length will be chosen (and this can potentially vary during
|
||||
* runtime).
|
||||
*/
|
||||
using MatRegContainer = gem5::MatStore<MaxSmeVecLenInBytes,
|
||||
MaxSmeVecLenInBytes>;
|
||||
|
||||
template<typename ElemType>
|
||||
using MatTile = gem5::Tile<ElemType,
|
||||
MatRegContainer>;
|
||||
|
||||
template<typename ElemType>
|
||||
using MatTileRow = gem5::HorizontalSlice<ElemType,
|
||||
MatRegContainer,
|
||||
true>;
|
||||
|
||||
template<typename ElemType>
|
||||
using MatTileCol = gem5::VerticalSlice<ElemType,
|
||||
MatRegContainer,
|
||||
true>;
|
||||
|
||||
template<typename ElemType>
|
||||
using MatRow = gem5::HorizontalSlice<ElemType,
|
||||
MatRegContainer,
|
||||
false>;
|
||||
|
||||
template<typename ElemType>
|
||||
using MatCol = gem5::VerticalSlice<ElemType,
|
||||
MatRegContainer,
|
||||
false>;
|
||||
|
||||
// SME ZA tile, i.e. matrix
|
||||
const int NumMatrixRegs = 1;
|
||||
|
||||
static inline TypedRegClassOps<ArmISA::MatRegContainer> matRegClassOps;
|
||||
|
||||
inline constexpr RegClass matRegClass =
|
||||
RegClass(MatRegClass, MatRegClassName, NumMatrixRegs, debug::MatRegs).
|
||||
ops(matRegClassOps).
|
||||
regType<MatRegContainer>();
|
||||
|
||||
/*
|
||||
* Helpers for providing access to the different views of a matrix
|
||||
* register. Intended to be called from the instruction implementations
|
||||
* themselves.
|
||||
*/
|
||||
template<typename ElemType>
|
||||
MatTile<ElemType>
|
||||
getTile(MatRegContainer ®, uint8_t tile_idx)
|
||||
{
|
||||
return reg.asTile<ElemType>(tile_idx);
|
||||
}
|
||||
template<typename ElemType>
|
||||
MatTileRow<ElemType>
|
||||
getTileHSlice(MatRegContainer ®, uint8_t tile_idx, uint8_t row_idx)
|
||||
{
|
||||
return reg.asTile<ElemType>(tile_idx).asHSlice(row_idx);
|
||||
}
|
||||
template<typename ElemType>
|
||||
MatTileCol<ElemType>
|
||||
getTileVSlice(MatRegContainer ®, uint8_t tile_idx, uint8_t col_idx)
|
||||
{
|
||||
return reg.asTile<ElemType>(tile_idx).asVSlice(col_idx);
|
||||
}
|
||||
template<typename ElemType>
|
||||
MatRow<ElemType>
|
||||
getHSlice(MatRegContainer ®, uint8_t row_idx)
|
||||
{
|
||||
return reg.asHSlice<ElemType>(row_idx);
|
||||
}
|
||||
template<typename ElemType>
|
||||
MatCol<ElemType>
|
||||
getVSlice(MatRegContainer ®, uint8_t col_idx)
|
||||
{
|
||||
return reg.asVSlice<ElemType>(col_idx);
|
||||
}
|
||||
|
||||
} // namespace ArmISA
|
||||
} // namespace gem5
|
||||
|
||||
#endif
|
||||
@@ -472,6 +472,18 @@ namespace ArmISA
|
||||
constexpr unsigned VecRegSizeBytes = MaxSveVecLenInBytes;
|
||||
constexpr unsigned VecPredRegSizeBits = MaxSveVecLenInBytes;
|
||||
|
||||
constexpr unsigned MaxSmeVecLenInBits = 2048;
|
||||
static_assert(MaxSmeVecLenInBits >= 128 &&
|
||||
MaxSmeVecLenInBits <= 2048 &&
|
||||
// Only powers of two are supported. We don't need to
|
||||
// check for the zero case here as we already know it
|
||||
// is over 128.
|
||||
(MaxSmeVecLenInBits & (MaxSmeVecLenInBits - 1)) == 0,
|
||||
"Unsupported max. SME vector length");
|
||||
constexpr unsigned MaxSmeVecLenInBytes = MaxSmeVecLenInBits >> 3;
|
||||
constexpr unsigned MaxSmeVecLenInWords = MaxSmeVecLenInBits >> 5;
|
||||
constexpr unsigned MaxSmeVecLenInDWords = MaxSmeVecLenInBits >> 6;
|
||||
|
||||
} // namespace ArmISA
|
||||
} // namespace gem5
|
||||
|
||||
|
||||
Reference in New Issue
Block a user