Introduce X1, X2, X3, X4 complexity levels
This commit is contained in:
@@ -8,6 +8,10 @@ forced-target = "aarch64-unknown-none"
|
||||
|
||||
[features]
|
||||
cacheless = []
|
||||
X1 = []
|
||||
X2 = []
|
||||
X3 = []
|
||||
X4 = []
|
||||
|
||||
[dependencies]
|
||||
aarch64-cpu = "9.4.0"
|
||||
|
||||
@@ -18,14 +18,35 @@ use pim_os::{
|
||||
uart::Uart0,
|
||||
};
|
||||
|
||||
#[cfg(feature = "X1")]
|
||||
const ROWS: usize = 16;
|
||||
|
||||
#[cfg(feature = "X2")]
|
||||
const ROWS: usize = 32;
|
||||
|
||||
#[cfg(feature = "X3")]
|
||||
const ROWS: usize = 64;
|
||||
|
||||
#[cfg(feature = "X4")]
|
||||
const ROWS: usize = 128;
|
||||
|
||||
const COLUMNS: usize = 128;
|
||||
const X16_ROWS: usize = ROWS / 16;
|
||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn main() {
|
||||
pim::state::set_kernel(&gemv::KERNEL);
|
||||
#[cfg(feature = "X1")]
|
||||
pim::state::set_kernel(&gemv::KERNEL_X1);
|
||||
|
||||
#[cfg(feature = "X2")]
|
||||
pim::state::set_kernel(&gemv::KERNEL_X2);
|
||||
|
||||
#[cfg(feature = "X3")]
|
||||
pim::state::set_kernel(&gemv::KERNEL_X3);
|
||||
|
||||
#[cfg(feature = "X4")]
|
||||
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||
|
||||
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
||||
matrix.fill_lower_triangle(F16x1::one(), 0);
|
||||
|
||||
@@ -5,7 +5,78 @@ use aarch64_cpu::asm::barrier;
|
||||
use nalgebra::SVector;
|
||||
use pim_isa::{File, Instruction, Kernel};
|
||||
|
||||
pub const KERNEL: Kernel = Kernel([
|
||||
pub const KERNEL_X1: Kernel = Kernel([
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 0 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 1 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 2 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 3 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 4 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 5 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 6 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 7 },
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfA { index: 0 },
|
||||
src2: File::GrfB { index: 0 },
|
||||
dst: File::GrfB { index: 0 },
|
||||
aam: true,
|
||||
},
|
||||
Instruction::JUMP {
|
||||
offset: -1,
|
||||
count: 15,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 0 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::EXIT,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
pub const KERNEL_X2: Kernel = Kernel([
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 0 },
|
||||
@@ -79,8 +150,180 @@ pub const KERNEL: Kernel = Kernel([
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
pub fn execute<const R: usize, const X16C: usize>(
|
||||
matrix: &Matrix<2, X16C>,
|
||||
pub const KERNEL_X3: Kernel = Kernel([
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 0 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 1 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 2 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 3 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 4 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 5 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 6 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 7 },
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfA { index: 0 },
|
||||
src2: File::GrfB { index: 0 },
|
||||
dst: File::GrfB { index: 0 },
|
||||
aam: true,
|
||||
},
|
||||
Instruction::JUMP {
|
||||
offset: -1,
|
||||
count: 31,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 0 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 1 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 2 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 3 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::EXIT,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
pub const KERNEL_X4: Kernel = Kernel([
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 0 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 1 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 2 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 3 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 4 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 5 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 6 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 7 },
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfA { index: 0 },
|
||||
src2: File::GrfB { index: 0 },
|
||||
dst: File::GrfB { index: 0 },
|
||||
aam: true,
|
||||
},
|
||||
Instruction::JUMP {
|
||||
offset: -1,
|
||||
count: 63,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 0 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 1 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 2 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 3 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 4 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 5 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 6 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 7 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::EXIT,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
||||
matrix: &Matrix<X16R, X16C>,
|
||||
input_vector: &interleaved_array::Vector<X16C>,
|
||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||
dummy: &impl PimOperand,
|
||||
@@ -98,7 +341,7 @@ pub fn execute<const R: usize, const X16C: usize>(
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
for chunk in output_partial_sum_vector
|
||||
.fixed_rows_with_step_mut::<2>(0, 16)
|
||||
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
||||
.iter_mut()
|
||||
{
|
||||
chunk.execute_write();
|
||||
|
||||
Reference in New Issue
Block a user