Enable shared PCUs for samsung layout

This commit is contained in:
2024-01-20 19:11:35 +01:00
parent 400774df6f
commit 71c813b578
2 changed files with 44 additions and 9 deletions

View File

@@ -10,21 +10,32 @@ use half::f16;
use nalgebra::{SMatrix, SVector};
use pim_isa::BankMode;
use pim_os::{
pim::{self, interleaved_array, kernel::samsung_matrix_vector_mul, vector::F16x1},
pim::{
self, interleaved_array,
kernel::samsung_matrix_vector_mul,
vector::{self, F16x1},
},
uart::Uart0,
};
const ROWS: usize = 32;
const COLUMNS: usize = 128;
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL);
let mut matrix = Box::new(pim::continuous_array::Matrix::<32, 128>(SMatrix::zeros()));
let mut matrix = Box::new(pim::continuous_array::Matrix::<ROWS, COLUMNS>(
SMatrix::zeros(),
));
matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0);
let input_vector = pim::continuous_array::Matrix::<128, 1>(SVector::from_fn(|_, _| {
let input_vector = pim::continuous_array::Matrix::<COLUMNS, 1>(SVector::from_fn(|_, _| {
F16x1(f16::from_f32(1 as _))
}));
let mut output_partial_sum_vector =
Box::new(pim::continuous_array::Matrix::<32, 16>(SMatrix::zeros()));
let mut output_partial_sum_vector = Box::new(pim::continuous_array::Matrix::<
ROWS,
{ vector::ELEMENT_COUNT },
>(SMatrix::zeros()));
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector));
@@ -49,7 +60,7 @@ pub extern "C" fn main() {
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, 32>::from_fn(|r, _| {
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector
.0
.row(r)

View File

@@ -45,6 +45,17 @@ pub const KERNEL: Kernel = Kernel([
offset: -1,
count: 7,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
@@ -68,8 +79,6 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const C: usize>(
@@ -87,7 +96,22 @@ pub fn execute<const R: usize, const C: usize>(
for matrix_column in matrix
.0
.fixed_rows::<1>(0)
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT)
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT - 1)
.iter()
{
use core::fmt::Write;
writeln!(
crate::uart::Uart0,
"{:?}",
core::ptr::addr_of!(*matrix_column)
);
matrix_column.execute_read();
}
for matrix_column in matrix
.0
.fixed_rows::<1>(0)
.fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(4, vector::ELEMENT_COUNT - 1)
.iter()
{
matrix_column.execute_read();