Files
master-thesis-presentation/slides/appendix.md
2024-04-03 23:14:06 +02:00

1.0 KiB

Appendix

GEMV Kernel


pub fn execute<const X16R: usize, const X16C: usize, const R: usize>(
    matrix: &Matrix<X16R, X16C>,
    input_vector: &Vector<X16C>,
    output_partial_sum_vector: &mut SVector<F16x16, R>,
    dummy: &impl PimOperand,
) {
    // Load input vector into GRF-A registers
    for chunk in input_vector.0.iter() {
        chunk.execute_read();
    }

    // Execute the MAC instructions without memory barriers
    for sub_matrix in matrix.0.iter() {
        for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
            column_block.execute_read_async();
        }
    }

    // Verify all memory accesses have finished
    barrier::dsb(barrier::SY);

    // Copy the partial sums into the bank
    for chunk in output_partial_sum_vector
        .fixed_rows_with_step_mut::<X16R>(0, 16)
        .iter_mut()
    {
        chunk.execute_write();
    }

    // Execute the EXIT instruction
    dummy.execute_read();
}