Introduce X1, X2, X3, X4 complexity levels

This commit is contained in:
2024-02-22 19:07:16 +01:00
parent 680933e0e6
commit ece54d5833
3 changed files with 273 additions and 5 deletions

View File

@@ -8,6 +8,10 @@ forced-target = "aarch64-unknown-none"
[features] [features]
cacheless = [] cacheless = []
X1 = []
X2 = []
X3 = []
X4 = []
[dependencies] [dependencies]
aarch64-cpu = "9.4.0" aarch64-cpu = "9.4.0"

View File

@@ -18,14 +18,35 @@ use pim_os::{
uart::Uart0, uart::Uart0,
}; };
#[cfg(feature = "X1")]
const ROWS: usize = 16;
#[cfg(feature = "X2")]
const ROWS: usize = 32; const ROWS: usize = 32;
#[cfg(feature = "X3")]
const ROWS: usize = 64;
#[cfg(feature = "X4")]
const ROWS: usize = 128;
const COLUMNS: usize = 128; const COLUMNS: usize = 128;
const X16_ROWS: usize = ROWS / 16; const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16; const X16_COLUMNS: usize = COLUMNS / 16;
#[no_mangle] #[no_mangle]
pub extern "C" fn main() { pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL); #[cfg(feature = "X1")]
pim::state::set_kernel(&gemv::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&gemv::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&gemv::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&gemv::KERNEL_X4);
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
matrix.fill_lower_triangle(F16x1::one(), 0); matrix.fill_lower_triangle(F16x1::one(), 0);

View File

@@ -5,7 +5,78 @@ use aarch64_cpu::asm::barrier;
use nalgebra::SVector; use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel}; use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([ pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 15,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV { Instruction::MOV {
src: File::Bank, src: File::Bank,
dst: File::GrfA { index: 0 }, dst: File::GrfA { index: 0 },
@@ -79,8 +150,180 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP, Instruction::NOP,
]); ]);
pub fn execute<const R: usize, const X16C: usize>( pub const KERNEL_X3: Kernel = Kernel([
matrix: &Matrix<2, X16C>, Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 31,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 63,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
matrix: &Matrix<X16R, X16C>,
input_vector: &interleaved_array::Vector<X16C>, input_vector: &interleaved_array::Vector<X16C>,
output_partial_sum_vector: &mut SVector<F16x16, R>, output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand, dummy: &impl PimOperand,
@@ -98,7 +341,7 @@ pub fn execute<const R: usize, const X16C: usize>(
barrier::dsb(barrier::SY); barrier::dsb(barrier::SY);
for chunk in output_partial_sum_vector for chunk in output_partial_sum_vector
.fixed_rows_with_step_mut::<2>(0, 16) .fixed_rows_with_step_mut::<X16R>(0, 16)
.iter_mut() .iter_mut()
{ {
chunk.execute_write(); chunk.execute_write();