From ece54d5833b48df609337dfc5d8c39d4b6ec660b Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Thu, 22 Feb 2024 19:07:16 +0100 Subject: [PATCH] Introduce X1, X2, X3, X4 complexity levels --- pim-os/Cargo.toml | 4 + pim-os/src/bin/gemv.rs | 23 +++- pim-os/src/kernel/gemv.rs | 251 +++++++++++++++++++++++++++++++++++++- 3 files changed, 273 insertions(+), 5 deletions(-) diff --git a/pim-os/Cargo.toml b/pim-os/Cargo.toml index 1849a98..57631d6 100644 --- a/pim-os/Cargo.toml +++ b/pim-os/Cargo.toml @@ -8,6 +8,10 @@ forced-target = "aarch64-unknown-none" [features] cacheless = [] +X1 = [] +X2 = [] +X3 = [] +X4 = [] [dependencies] aarch64-cpu = "9.4.0" diff --git a/pim-os/src/bin/gemv.rs b/pim-os/src/bin/gemv.rs index 766b4a0..9a51d9b 100644 --- a/pim-os/src/bin/gemv.rs +++ b/pim-os/src/bin/gemv.rs @@ -18,14 +18,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] +const ROWS: usize = 16; + +#[cfg(feature = "X2")] const ROWS: usize = 32; + +#[cfg(feature = "X3")] +const ROWS: usize = 64; + +#[cfg(feature = "X4")] +const ROWS: usize = 128; + const COLUMNS: usize = 128; const X16_ROWS: usize = ROWS / 16; const X16_COLUMNS: usize = COLUMNS / 16; #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&gemv::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&gemv::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&gemv::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&gemv::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&gemv::KERNEL_X4); let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); matrix.fill_lower_triangle(F16x1::one(), 0); diff --git a/pim-os/src/kernel/gemv.rs b/pim-os/src/kernel/gemv.rs index 20dda61..e5bef30 100644 --- a/pim-os/src/kernel/gemv.rs +++ b/pim-os/src/kernel/gemv.rs @@ -5,7 +5,78 @@ use aarch64_cpu::asm::barrier; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 15, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X2: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -79,8 +150,180 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); -pub fn execute( - matrix: &Matrix<2, X16C>, +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 31, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 63, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + matrix: &Matrix, input_vector: &interleaved_array::Vector, output_partial_sum_vector: &mut SVector, dummy: &impl PimOperand, @@ -98,7 +341,7 @@ pub fn execute( barrier::dsb(barrier::SY); for chunk in output_partial_sum_vector - .fixed_rows_with_step_mut::<2>(0, 16) + .fixed_rows_with_step_mut::(0, 16) .iter_mut() { chunk.execute_write();