diff --git a/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs b/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs index 6f23c5b..42c6e16 100644 --- a/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs +++ b/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs @@ -1,6 +1,7 @@ use crate::pim::{ continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16, }; +use aarch64_cpu::asm::barrier; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; @@ -88,12 +89,14 @@ pub fn execute( block.execute_read(); } - for row_block in matrix.0.iter() { - for column_block in row_block.fixed_rows::<1>(0).iter() { - column_block.execute_read(); + for sub_matrix in matrix.0.iter() { + for column_block in sub_matrix.fixed_rows::<1>(0).iter() { + column_block.execute_read_async(); } } + barrier::dsb(barrier::SY); + for chunk in output_partial_sum_vector .fixed_rows_with_step_mut::(0, 16) .iter_mut() diff --git a/pim-os/src/pim/operation.rs b/pim-os/src/pim/operation.rs index 146a480..a0c59b4 100644 --- a/pim-os/src/pim/operation.rs +++ b/pim-os/src/pim/operation.rs @@ -9,6 +9,10 @@ pub trait PimOperand { barrier::dsb(barrier::SY); } + fn execute_read_async(&self) { + unsafe { core::ptr::read_volatile(self.ptr()) }; + } + fn execute_write(&mut self) { unsafe { core::ptr::write_volatile(self.ptr_mut(), Default::default()) }; barrier::dsb(barrier::SY);