Files
pim-rs/pim-os/src/pim/kernel/samsung_matrix_vector_mul.rs

106 lines
2.5 KiB
Rust

use crate::pim::{
continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16,
};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 15,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
matrix: &Matrix<X16R, X16C>,
input_vector: &interleaved_array::Vector<X16C>,
output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand,
) {
for block in input_vector.0.as_slice().iter() {
block.execute_read();
}
for row_block in matrix.0.iter() {
for column_block in row_block.fixed_rows::<1>(0).iter() {
column_block.execute_read();
}
}
for chunk in output_partial_sum_vector
.fixed_rows_with_step_mut::<X16R>(0, 16)
.iter_mut()
{
chunk.execute_write();
}
dummy.execute_read();
}