Introduce async execution API without barriers

This commit is contained in:
2024-02-21 23:43:11 +01:00
parent f04ee8e603
commit ec3ad2e497
2 changed files with 10 additions and 3 deletions

View File

@@ -1,6 +1,7 @@
use crate::pim::{
continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16,
};
use aarch64_cpu::asm::barrier;
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
@@ -88,12 +89,14 @@ pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
block.execute_read();
}
for row_block in matrix.0.iter() {
for column_block in row_block.fixed_rows::<1>(0).iter() {
column_block.execute_read();
for sub_matrix in matrix.0.iter() {
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
column_block.execute_read_async();
}
}
barrier::dsb(barrier::SY);
for chunk in output_partial_sum_vector
.fixed_rows_with_step_mut::<X16R>(0, 16)
.iter_mut()

View File

@@ -9,6 +9,10 @@ pub trait PimOperand {
barrier::dsb(barrier::SY);
}
fn execute_read_async(&self) {
unsafe { core::ptr::read_volatile(self.ptr()) };
}
fn execute_write(&mut self) {
unsafe { core::ptr::write_volatile(self.ptr_mut(), Default::default()) };
barrier::dsb(barrier::SY);