Add rest of slides
This commit is contained in:
31
kernel.rs
Normal file
31
kernel.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
pub fn execute<const X16R: usize, const X16C: usize, const R: usize>(
|
||||
matrix: &Matrix<X16R, X16C>,
|
||||
input_vector: &Vector<X16C>,
|
||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||
dummy: &impl PimOperand,
|
||||
) {
|
||||
// Load input vector into GRF-A registers
|
||||
for chunk in input_vector.0.iter() {
|
||||
chunk.execute_read();
|
||||
}
|
||||
|
||||
// Execute the MAC instructions without memory barriers
|
||||
for sub_matrix in matrix.0.iter() {
|
||||
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
|
||||
column_block.execute_read_async();
|
||||
}
|
||||
}
|
||||
|
||||
// Verify all memory accesses have finished
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
// Copy the partial sums into the bank
|
||||
for chunk in output_partial_sum_vector
|
||||
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
||||
.iter_mut() {
|
||||
chunk.execute_write();
|
||||
}
|
||||
|
||||
// Execute the EXIT instruction
|
||||
dummy.execute_read();
|
||||
}
|
||||
Reference in New Issue
Block a user