Fix memory layout
This commit is contained in:
@@ -6,38 +6,42 @@ extern crate alloc;
|
|||||||
use aarch64_cpu::asm::barrier;
|
use aarch64_cpu::asm::barrier;
|
||||||
use alloc::boxed::Box;
|
use alloc::boxed::Box;
|
||||||
use core::fmt::Write;
|
use core::fmt::Write;
|
||||||
use half::f16;
|
|
||||||
use nalgebra::{SMatrix, SVector};
|
use nalgebra::{SMatrix, SVector};
|
||||||
|
use num_traits::{One, Zero};
|
||||||
use pim_isa::BankMode;
|
use pim_isa::BankMode;
|
||||||
use pim_os::{
|
use pim_os::{
|
||||||
pim::{
|
pim::{
|
||||||
self, interleaved_array,
|
self, interleaved_array,
|
||||||
kernel::samsung_matrix_vector_mul,
|
kernel::samsung_matrix_vector_mul,
|
||||||
vector::{self, F16x1},
|
vector::{F16x1, F16x16},
|
||||||
},
|
},
|
||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
const ROWS: usize = 32;
|
const ROWS: usize = 16;
|
||||||
const COLUMNS: usize = 128;
|
const COLUMNS: usize = 128;
|
||||||
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL);
|
pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL);
|
||||||
|
|
||||||
let mut matrix = Box::new(pim::continuous_array::Matrix::<ROWS, COLUMNS>(
|
let matrix = Box::new(pim::continuous_array::Matrix::<ROWS, X16_COLUMNS>(
|
||||||
SMatrix::zeros(),
|
SMatrix::from_fn(|r, c| {
|
||||||
));
|
if c > 0 {
|
||||||
matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0);
|
return F16x16::zero();
|
||||||
let input_vector = pim::continuous_array::Matrix::<COLUMNS, 1>(SVector::from_fn(|_, _| {
|
}
|
||||||
F16x1(f16::from_f32(1 as _))
|
|
||||||
}));
|
let mut entry = F16x16::zero();
|
||||||
let mut output_partial_sum_vector = Box::new(pim::continuous_array::Matrix::<
|
entry.0.iter_mut().take(r).for_each(|val| *val = F16x1::one());
|
||||||
ROWS,
|
|
||||||
{ vector::ELEMENT_COUNT },
|
entry
|
||||||
>(SMatrix::zeros()));
|
})));
|
||||||
|
|
||||||
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector));
|
let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one());
|
||||||
|
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector));
|
||||||
|
|
||||||
|
let mut output_partial_sum_vector = Box::new(SVector::<F16x16, ROWS>::zeros());
|
||||||
|
|
||||||
let dummy = Box::new(0);
|
let dummy = Box::new(0);
|
||||||
|
|
||||||
@@ -61,11 +65,10 @@ pub extern "C" fn main() {
|
|||||||
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
||||||
|
|
||||||
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
||||||
output_partial_sum_vector
|
output_partial_sum_vector[r]
|
||||||
.0
|
.0
|
||||||
.row(r)
|
|
||||||
.iter()
|
.iter()
|
||||||
.fold(F16x1::default(), |acc, val| acc + *val)
|
.fold(F16x1::zero(), |acc, val| acc + *val)
|
||||||
});
|
});
|
||||||
|
|
||||||
writeln!(Uart0, "{output_vector}").unwrap();
|
writeln!(Uart0, "{output_vector}").unwrap();
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
#![feature(generic_const_exprs)]
|
|
||||||
#![no_std]
|
#![no_std]
|
||||||
|
|
||||||
use core::sync::atomic::{compiler_fence, Ordering};
|
use core::sync::atomic::{compiler_fence, Ordering};
|
||||||
|
|||||||
@@ -1,12 +1,10 @@
|
|||||||
use super::vector::F16x1;
|
use super::vector::F16x16;
|
||||||
use core::fmt::Display;
|
use core::fmt::Display;
|
||||||
use nalgebra::SMatrix;
|
use nalgebra::SMatrix;
|
||||||
|
|
||||||
#[repr(C, align(65536))]
|
#[repr(C, align(65536))]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Matrix<const R: usize, const C: usize>(pub SMatrix<F16x1, R, C>);
|
pub struct Matrix<const R: usize, const C: usize>(pub SMatrix<F16x16, R, C>);
|
||||||
|
|
||||||
pub type Vector<const R: usize> = Matrix<R, 1>;
|
|
||||||
|
|
||||||
impl<const R: usize, const C: usize> Display for Matrix<R, C> {
|
impl<const R: usize, const C: usize> Display for Matrix<R, C> {
|
||||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
|||||||
@@ -1,37 +1,27 @@
|
|||||||
use super::{array::NUMBER_OF_BANKS, continuous_array, vector::F16x16, vector::ELEMENT_COUNT};
|
use super::{array::NUMBER_OF_BANKS, vector::F16x16};
|
||||||
|
use nalgebra::SVector;
|
||||||
|
|
||||||
#[repr(C, align(65536))]
|
#[repr(C, align(512))]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Vector<const R: usize>(pub [[F16x16; NUMBER_OF_BANKS]; R / ELEMENT_COUNT])
|
pub struct Vector<const R: usize>(pub [[F16x16; NUMBER_OF_BANKS]; R]);
|
||||||
where
|
|
||||||
[(); R / ELEMENT_COUNT]:;
|
|
||||||
|
|
||||||
impl<const R: usize> Default for Vector<R>
|
impl<const R: usize> Default for Vector<R> {
|
||||||
where
|
|
||||||
[(); R / ELEMENT_COUNT]:,
|
|
||||||
{
|
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self([[F16x16::default(); NUMBER_OF_BANKS]; R / ELEMENT_COUNT])
|
Self([[F16x16::default(); NUMBER_OF_BANKS]; R])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const R: usize> From<&continuous_array::Vector<R>> for Vector<R>
|
impl<const R: usize> From<SVector<F16x16, R>> for Vector<R> {
|
||||||
where
|
fn from(input_vector: SVector<F16x16, R>) -> Self {
|
||||||
[(); R / ELEMENT_COUNT]:,
|
let mut interleaved_vector = Self::default();
|
||||||
{
|
|
||||||
fn from(continuous_vector: &continuous_array::Vector<R>) -> Self {
|
for block_index in 0..R {
|
||||||
let mut vector = Self::default();
|
let element = input_vector[block_index];
|
||||||
let blocks: usize = R / ELEMENT_COUNT;
|
|
||||||
for block_index in 0..blocks {
|
|
||||||
let element =
|
|
||||||
unsafe { *(continuous_vector.0.as_ptr() as *const F16x16).add(block_index) };
|
|
||||||
for k in 0..NUMBER_OF_BANKS {
|
for k in 0..NUMBER_OF_BANKS {
|
||||||
let interleaved_block_index = block_index * NUMBER_OF_BANKS + k;
|
interleaved_vector.0[block_index][k] = element;
|
||||||
unsafe {
|
|
||||||
*(vector.0.as_mut_ptr() as *mut F16x16).add(interleaved_block_index) = element;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vector
|
|
||||||
|
interleaved_vector
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
use crate::pim::{continuous_array::Matrix, interleaved_array, operation::PimOperand, vector};
|
use crate::pim::{
|
||||||
|
continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16,
|
||||||
|
};
|
||||||
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL: Kernel = Kernel([
|
||||||
@@ -45,17 +48,6 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
offset: -1,
|
offset: -1,
|
||||||
count: 7,
|
count: 7,
|
||||||
},
|
},
|
||||||
Instruction::MAC {
|
|
||||||
src0: File::Bank,
|
|
||||||
src1: File::GrfA { index: 0 },
|
|
||||||
src2: File::GrfB { index: 0 },
|
|
||||||
dst: File::GrfB { index: 0 },
|
|
||||||
aam: true,
|
|
||||||
},
|
|
||||||
Instruction::JUMP {
|
|
||||||
offset: -1,
|
|
||||||
count: 7,
|
|
||||||
},
|
|
||||||
Instruction::FILL {
|
Instruction::FILL {
|
||||||
src: File::GrfB { index: 0 },
|
src: File::GrfB { index: 0 },
|
||||||
dst: File::Bank,
|
dst: File::Bank,
|
||||||
@@ -79,42 +71,22 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
pub fn execute<const R: usize, const C: usize>(
|
pub fn execute<const R: usize, const C: usize>(
|
||||||
matrix: &Matrix<R, C>,
|
matrix: &Matrix<R, C>,
|
||||||
input_vector: &interleaved_array::Vector<C>,
|
input_vector: &interleaved_array::Vector<C>,
|
||||||
output_partial_sum_vector: &mut Matrix<R, { vector::ELEMENT_COUNT }>,
|
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||||
dummy: &impl PimOperand,
|
dummy: &impl PimOperand,
|
||||||
) where
|
) {
|
||||||
[(); C / vector::ELEMENT_COUNT]:,
|
|
||||||
{
|
|
||||||
for block in input_vector.0.as_slice().iter() {
|
for block in input_vector.0.as_slice().iter() {
|
||||||
block.execute_read();
|
block.execute_read();
|
||||||
}
|
}
|
||||||
|
|
||||||
for matrix_column in matrix
|
for column_block in matrix.0.fixed_rows::<1>(0).iter() {
|
||||||
.0
|
column_block.execute_read();
|
||||||
.fixed_rows::<1>(0)
|
|
||||||
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT - 1)
|
|
||||||
.iter()
|
|
||||||
{
|
|
||||||
use core::fmt::Write;
|
|
||||||
writeln!(
|
|
||||||
crate::uart::Uart0,
|
|
||||||
"{:?}",
|
|
||||||
core::ptr::addr_of!(*matrix_column)
|
|
||||||
);
|
|
||||||
matrix_column.execute_read();
|
|
||||||
}
|
|
||||||
|
|
||||||
for matrix_column in matrix
|
|
||||||
.0
|
|
||||||
.fixed_rows::<1>(0)
|
|
||||||
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(4, vector::ELEMENT_COUNT - 1)
|
|
||||||
.iter()
|
|
||||||
{
|
|
||||||
matrix_column.execute_read();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
output_partial_sum_vector.execute_write();
|
output_partial_sum_vector.execute_write();
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use core::fmt::{Debug, Display};
|
||||||
|
|
||||||
use half::f16;
|
use half::f16;
|
||||||
|
|
||||||
pub const ELEMENT_COUNT: usize = 16;
|
pub const ELEMENT_COUNT: usize = 16;
|
||||||
@@ -8,13 +10,13 @@ pub struct F16x1(pub f16);
|
|||||||
|
|
||||||
impl core::fmt::Debug for F16x1 {
|
impl core::fmt::Debug for F16x1 {
|
||||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
self.0.fmt(f)
|
Debug::fmt(&self.0, f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl core::fmt::Display for F16x1 {
|
impl core::fmt::Display for F16x1 {
|
||||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
self.0.fmt(f)
|
Display::fmt(&self.0, f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,6 +68,12 @@ impl core::ops::MulAssign<F16x1> for F16x1 {
|
|||||||
#[derive(Default, Debug, Clone, Copy, PartialEq)]
|
#[derive(Default, Debug, Clone, Copy, PartialEq)]
|
||||||
pub struct F16x16(pub [F16x1; ELEMENT_COUNT]);
|
pub struct F16x16(pub [F16x1; ELEMENT_COUNT]);
|
||||||
|
|
||||||
|
impl Display for F16x16 {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
write!(f, "{:?}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl num_traits::identities::Zero for F16x16 {
|
impl num_traits::identities::Zero for F16x16 {
|
||||||
fn zero() -> Self {
|
fn zero() -> Self {
|
||||||
Self([F16x1::zero(); ELEMENT_COUNT])
|
Self([F16x1::zero(); ELEMENT_COUNT])
|
||||||
|
|||||||
Reference in New Issue
Block a user