From 71c813b578f6ef3e568b77e1faf378aa25c5527e Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Sat, 20 Jan 2024 19:11:35 +0100 Subject: [PATCH] Enable shared PCUs for samsung layout --- .../src/bin/samsung_matrix_vector_multiply.rs | 23 ++++++++++---- .../pim/kernel/samsung_matrix_vector_mul.rs | 30 +++++++++++++++++-- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pim-os/src/bin/samsung_matrix_vector_multiply.rs b/pim-os/src/bin/samsung_matrix_vector_multiply.rs index 1983ac1..707c838 100644 --- a/pim-os/src/bin/samsung_matrix_vector_multiply.rs +++ b/pim-os/src/bin/samsung_matrix_vector_multiply.rs @@ -10,21 +10,32 @@ use half::f16; use nalgebra::{SMatrix, SVector}; use pim_isa::BankMode; use pim_os::{ - pim::{self, interleaved_array, kernel::samsung_matrix_vector_mul, vector::F16x1}, + pim::{ + self, interleaved_array, + kernel::samsung_matrix_vector_mul, + vector::{self, F16x1}, + }, uart::Uart0, }; +const ROWS: usize = 32; +const COLUMNS: usize = 128; + #[no_mangle] pub extern "C" fn main() { pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL); - let mut matrix = Box::new(pim::continuous_array::Matrix::<32, 128>(SMatrix::zeros())); + let mut matrix = Box::new(pim::continuous_array::Matrix::( + SMatrix::zeros(), + )); matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0); - let input_vector = pim::continuous_array::Matrix::<128, 1>(SVector::from_fn(|_, _| { + let input_vector = pim::continuous_array::Matrix::(SVector::from_fn(|_, _| { F16x1(f16::from_f32(1 as _)) })); - let mut output_partial_sum_vector = - Box::new(pim::continuous_array::Matrix::<32, 16>(SMatrix::zeros())); + let mut output_partial_sum_vector = Box::new(pim::continuous_array::Matrix::< + ROWS, + { vector::ELEMENT_COUNT }, + >(SMatrix::zeros())); let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector)); @@ -49,7 +60,7 @@ pub extern "C" fn main() { writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); - let output_vector = SVector::::from_fn(|r, _| { + let output_vector = SVector::::from_fn(|r, _| { output_partial_sum_vector .0 .row(r) diff --git a/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs b/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs index 56eaf7f..ee6df2d 100644 --- a/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs +++ b/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs @@ -45,6 +45,17 @@ pub const KERNEL: Kernel = Kernel([ offset: -1, count: 7, }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 7, + }, Instruction::FILL { src: File::GrfB { index: 0 }, dst: File::Bank, @@ -68,8 +79,6 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, Instruction::NOP, Instruction::NOP, - Instruction::NOP, - Instruction::NOP, ]); pub fn execute( @@ -87,7 +96,22 @@ pub fn execute( for matrix_column in matrix .0 .fixed_rows::<1>(0) - .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT) + .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT - 1) + .iter() + { + use core::fmt::Write; + writeln!( + crate::uart::Uart0, + "{:?}", + core::ptr::addr_of!(*matrix_column) + ); + matrix_column.execute_read(); + } + + for matrix_column in matrix + .0 + .fixed_rows::<1>(0) + .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(4, vector::ELEMENT_COUNT - 1) .iter() { matrix_column.execute_read();