Enable shared PCUs for samsung layout

2024-01-20 19:11:35 +01:00
parent 400774df6f
commit 71c813b578
2 changed files with 44 additions and 9 deletions
--- a/pim-os/src/bin/samsung_matrix_vector_multiply.rs
+++ b/pim-os/src/bin/samsung_matrix_vector_multiply.rs
@@ -10,21 +10,32 @@ use half::f16;
 use nalgebra::{SMatrix, SVector};
 use pim_isa::BankMode;
 use pim_os::{
-    pim::{self, interleaved_array, kernel::samsung_matrix_vector_mul, vector::F16x1},
+    pim::{
+        self, interleaved_array,
+        kernel::samsung_matrix_vector_mul,
+        vector::{self, F16x1},
+    },
    uart::Uart0,
 };

+const ROWS: usize = 32;
+const COLUMNS: usize = 128;
+
 #[no_mangle]
 pub extern "C" fn main() {
    pim::state::set_kernel(&samsung_matrix_vector_mul::KERNEL);

-    let mut matrix = Box::new(pim::continuous_array::Matrix::<32, 128>(SMatrix::zeros()));
+    let mut matrix = Box::new(pim::continuous_array::Matrix::<ROWS, COLUMNS>(
+        SMatrix::zeros(),
+    ));
    matrix.0.fill_lower_triangle(F16x1(f16::ONE), 0);
-    let input_vector = pim::continuous_array::Matrix::<128, 1>(SVector::from_fn(|_, _| {
+    let input_vector = pim::continuous_array::Matrix::<COLUMNS, 1>(SVector::from_fn(|_, _| {
        F16x1(f16::from_f32(1 as _))
    }));
-    let mut output_partial_sum_vector =
-        Box::new(pim::continuous_array::Matrix::<32, 16>(SMatrix::zeros()));
+    let mut output_partial_sum_vector = Box::new(pim::continuous_array::Matrix::<
+        ROWS,
+        { vector::ELEMENT_COUNT },
+    >(SMatrix::zeros()));

    let interleaved_input_vector = Box::new(interleaved_array::Vector::from(&input_vector));

@@ -49,7 +60,7 @@ pub extern "C" fn main() {

    writeln!(Uart0, "{output_partial_sum_vector}").unwrap();

-    let output_vector = SVector::<F16x1, 32>::from_fn(|r, _| {
+    let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
        output_partial_sum_vector
            .0
            .row(r)
--- a/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs
+++ b/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs
@@ -45,6 +45,17 @@ pub const KERNEL: Kernel = Kernel([
        offset: -1,
        count: 7,
    },
+    Instruction::MAC {
+        src0: File::Bank,
+        src1: File::GrfA { index: 0 },
+        src2: File::GrfB { index: 0 },
+        dst: File::GrfB { index: 0 },
+        aam: true,
+    },
+    Instruction::JUMP {
+        offset: -1,
+        count: 7,
+    },
    Instruction::FILL {
        src: File::GrfB { index: 0 },
        dst: File::Bank,
@@ -68,8 +79,6 @@ pub const KERNEL: Kernel = Kernel([
    Instruction::NOP,
    Instruction::NOP,
    Instruction::NOP,
-    Instruction::NOP,
-    Instruction::NOP,
 ]);

 pub fn execute<const R: usize, const C: usize>(
@@ -87,7 +96,22 @@ pub fn execute<const R: usize, const C: usize>(
    for matrix_column in matrix
        .0
        .fixed_rows::<1>(0)
-        .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT)
+        .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(0, vector::ELEMENT_COUNT - 1)
+        .iter()
+    {
+        use core::fmt::Write;
+        writeln!(
+            crate::uart::Uart0,
+            "{:?}",
+            core::ptr::addr_of!(*matrix_column)
+        );
+        matrix_column.execute_read();
+    }
+
+    for matrix_column in matrix
+        .0
+        .fixed_rows::<1>(0)
+        .fixed_columns_with_step::<{ C / vector::ELEMENT_COUNT }>(4, vector::ELEMENT_COUNT - 1)
        .iter()
    {
        matrix_column.execute_read();