106 lines
2.5 KiB
Rust
106 lines
2.5 KiB
Rust
use crate::pim::{
|
|
continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16,
|
|
};
|
|
use nalgebra::SVector;
|
|
use pim_isa::{File, Instruction, Kernel};
|
|
|
|
pub const KERNEL: Kernel = Kernel([
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 0 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 1 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 2 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 3 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 4 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 5 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 6 },
|
|
},
|
|
Instruction::MOV {
|
|
src: File::Bank,
|
|
dst: File::GrfA { index: 7 },
|
|
},
|
|
Instruction::MAC {
|
|
src0: File::Bank,
|
|
src1: File::GrfA { index: 0 },
|
|
src2: File::GrfB { index: 0 },
|
|
dst: File::GrfB { index: 0 },
|
|
aam: true,
|
|
},
|
|
Instruction::JUMP {
|
|
offset: -1,
|
|
count: 15,
|
|
},
|
|
Instruction::FILL {
|
|
src: File::GrfB { index: 0 },
|
|
dst: File::Bank,
|
|
},
|
|
Instruction::FILL {
|
|
src: File::GrfB { index: 1 },
|
|
dst: File::Bank,
|
|
},
|
|
Instruction::EXIT,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
Instruction::NOP,
|
|
]);
|
|
|
|
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
|
matrix: &Matrix<X16R, X16C>,
|
|
input_vector: &interleaved_array::Vector<X16C>,
|
|
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
|
dummy: &impl PimOperand,
|
|
) {
|
|
for block in input_vector.0.as_slice().iter() {
|
|
block.execute_read();
|
|
}
|
|
|
|
for row_block in matrix.0.iter() {
|
|
for column_block in row_block.fixed_rows::<1>(0).iter() {
|
|
column_block.execute_read();
|
|
}
|
|
}
|
|
|
|
for chunk in output_partial_sum_vector
|
|
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
|
.iter_mut()
|
|
{
|
|
chunk.execute_write();
|
|
}
|
|
|
|
dummy.execute_read();
|
|
}
|