Enable shared PCUs for samsung layout

This commit is contained in:
2024-01-20 19:11:35 +01:00
parent 400774df6f
commit 71c813b578
2 changed files with 44 additions and 9 deletions

View File

@@ -10,21 +10,32 @@ use half::f16;
use nalgebra::{SMatrix, SVector}; use nalgebra::{SMatrix, SVector};
use pim_isa::BankMode; use pim_isa::BankMode;
use pim_os::{ use pim_os::{
pim::{self, interleaved_array, kernel::samsung_matrix_vector_mul, vector::F16x1}, pim::{
self, interleaved_array,
kernel::samsung_matrix_vector_mul,
vector::{self, F16x1},
},
uart::Uart0, uart::Uart0,
}; };
const ROWS: usize = 32;
const COLUMNS: usize = 128;
#[no_mangle] #[no_mangle]
pub extern "C" fn main() { pub extern "C" fn main() {
pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL); pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL);
let mut matrix = Box::new(pim::continuous_array::Matrix::<32, 128>(SMatrix::zeros())); let mut matrix = Box::new(pim::continuous_array::Matrix::<ROWS, COLUMNS>(
SMatrix::zeros(),
));
matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0); matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0);
let input_vector = pim::continuous_array::Matrix::<128, 1>(SVector::from_fn(|_, _| { let input_vector = pim::continuous_array::Matrix::<COLUMNS, 1>(SVector::from_fn(|_, _| {
F16x1(f16::from_f32(1 as _)) F16x1(f16::from_f32(1 as _))
})); }));
let mut output_partial_sum_vector = let mut output_partial_sum_vector = Box::new(pim::continuous_array::Matrix::<
Box::new(pim::continuous_array::Matrix::<32, 16>(SMatrix::zeros())); ROWS,
{ vector::ELEMENT_COUNT },
>(SMatrix::zeros()));
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector)); let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector));
@@ -49,7 +60,7 @@ pub extern "C" fn main() {
writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, 32>::from_fn(|r, _| { let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector output_partial_sum_vector
.0 .0
.row(r) .row(r)

View File

@@ -45,6 +45,17 @@ pub const KERNEL: Kernel = Kernel([
offset: -1, offset: -1,
count: 7, count: 7,
}, },
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL { Instruction::FILL {
src: File::GrfB { index: 0 }, src: File::GrfB { index: 0 },
dst: File::Bank, dst: File::Bank,
@@ -68,8 +79,6 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP, Instruction::NOP,
Instruction::NOP, Instruction::NOP,
Instruction::NOP, Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]); ]);
pub fn execute<const R: usize, const C: usize>( pub fn execute<const R: usize, const C: usize>(
@@ -87,7 +96,22 @@ pub fn execute<const R: usize, const C: usize>(
for matrix_column in matrix for matrix_column in matrix
.0 .0
.fixed_rows::<1>(0) .fixed_rows::<1>(0)
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT) .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT - 1)
.iter()
{
use core::fmt::Write;
writeln!(
crate::uart::Uart0,
"{:?}",
core::ptr::addr_of!(*matrix_column)
);
matrix_column.execute_read();
}
for matrix_column in matrix
.0
.fixed_rows::<1>(0)
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(4, vector::ELEMENT_COUNT - 1)
.iter() .iter()
{ {
matrix_column.execute_read(); matrix_column.execute_read();