Further changes
This commit is contained in:
@@ -1,42 +1,13 @@
|
||||
---
|
||||
layout: figure
|
||||
figureUrl: /layout.svg
|
||||
figureCaption: Mapping of the weight matrix onto the memory banks
|
||||
---
|
||||
|
||||
## Appendix
|
||||
### GEMV Kernel
|
||||
### Memory Layout
|
||||
<hr/>
|
||||
|
||||
<Transform :scale="0.7">
|
||||
|
||||
```rust {all}{lines:true}
|
||||
pub fn execute<const X16R: usize, const X16C: usize, const R: usize>(
|
||||
matrix: &Matrix<X16R, X16C>,
|
||||
input_vector: &Vector<X16C>,
|
||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||
dummy: &impl PimOperand,
|
||||
) {
|
||||
// Load input vector into GRF-A registers
|
||||
for chunk in input_vector.0.iter() {
|
||||
chunk.execute_read();
|
||||
}
|
||||
|
||||
// Execute the MAC instructions without memory barriers
|
||||
for sub_matrix in matrix.0.iter() {
|
||||
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
|
||||
column_block.execute_read_async();
|
||||
}
|
||||
}
|
||||
|
||||
// Verify all memory accesses have finished
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
// Copy the partial sums into the bank
|
||||
for chunk in output_partial_sum_vector
|
||||
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
||||
.iter_mut()
|
||||
{
|
||||
chunk.execute_write();
|
||||
}
|
||||
|
||||
// Execute the EXIT instruction
|
||||
dummy.execute_read();
|
||||
}
|
||||
```
|
||||
|
||||
</Transform>
|
||||
<!--
|
||||
- Data layout in program and address mapping must match
|
||||
-->
|
||||
|
||||
Reference in New Issue
Block a user