diff --git a/pim-os/src/bin/classic/matrix_matrix_multiply.rs b/pim-os/src/bin/classic/matrix_matrix_multiply.rs deleted file mode 100644 index 22aabfd..0000000 --- a/pim-os/src/bin/classic/matrix_matrix_multiply.rs +++ /dev/null @@ -1,23 +0,0 @@ -#![no_std] -#![no_main] - -extern crate alloc; - -use core::fmt::Write; -use nalgebra::SMatrix; -use pim_os::{pim::vector::F16x1, uart::Uart0}; - -#[no_mangle] -pub extern "C" fn main() { - let matrices0 = [SMatrix::::zeros(); 512]; - let matrices1 = [SMatrix::::zeros(); 512]; - - for _ in 0..100 { - for i in 0..512 { - let matrix2 = matrices0[i] * matrices1[i]; - core::hint::black_box(matrix2); - } - } - - writeln!(Uart0, "Done").unwrap(); -} diff --git a/pim-os/src/bin/classic_gemv.rs b/pim-os/src/bin/classic_gemv.rs new file mode 100644 index 0000000..23f7b90 --- /dev/null +++ b/pim-os/src/bin/classic_gemv.rs @@ -0,0 +1,24 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use core::fmt::Write; +use nalgebra::{SMatrix, SVector}; +use pim_os::{pim::vector::F16x1, uart::Uart0}; + +const ROWS: usize = 32; +const COLUMNS: usize = 128; + +#[no_mangle] +pub extern "C" fn main() { + let matrix = SMatrix::::zeros(); + let input_vector = SVector::::zeros(); + + for _ in 0..10 { + let output_vector = matrix * input_vector; + core::hint::black_box(output_vector); + } + + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/classic_haxpy.rs b/pim-os/src/bin/classic_haxpy.rs new file mode 100644 index 0000000..d3dc4cc --- /dev/null +++ b/pim-os/src/bin/classic_haxpy.rs @@ -0,0 +1,26 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use core::fmt::Write; +use nalgebra::SVector; +use num_traits::identities::Zero; +use pim_os::{pim::vector::F16x1, uart::Uart0}; + +const ROWS: usize = 2048; + +#[no_mangle] +pub extern "C" fn main() { + let a = SVector::::zeros(); + let b = SVector::::zeros(); + let s = F16x1::zero(); + + for _ in 0..10 { + let a_s = a * s; + let c = a_s.component_mul(&b); + core::hint::black_box(c); + } + + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/classic_vadd.rs b/pim-os/src/bin/classic_vadd.rs new file mode 100644 index 0000000..b78100b --- /dev/null +++ b/pim-os/src/bin/classic_vadd.rs @@ -0,0 +1,23 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use core::fmt::Write; +use nalgebra::SVector; +use pim_os::{pim::vector::F16x1, uart::Uart0}; + +const ROWS: usize = 2048; + +#[no_mangle] +pub extern "C" fn main() { + let a = SVector::::zeros(); + let b = SVector::::zeros(); + + for _ in 0..10 { + let c = a + b; + core::hint::black_box(c); + } + + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/classic_vmul.rs b/pim-os/src/bin/classic_vmul.rs new file mode 100644 index 0000000..6e0fbc4 --- /dev/null +++ b/pim-os/src/bin/classic_vmul.rs @@ -0,0 +1,23 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use core::fmt::Write; +use nalgebra::SVector; +use pim_os::{pim::vector::F16x1, uart::Uart0}; + +const ROWS: usize = 2048; + +#[no_mangle] +pub extern "C" fn main() { + let a = SVector::::zeros(); + let b = SVector::::zeros(); + + for _ in 0..10 { + let c = a.component_mul(&b); + core::hint::black_box(c); + } + + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/haxpy.rs b/pim-os/src/bin/haxpy.rs new file mode 100644 index 0000000..8c1e77e --- /dev/null +++ b/pim-os/src/bin/haxpy.rs @@ -0,0 +1,65 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use half::f16; +use nalgebra::SVector; +use pim_isa::BankMode; +use pim_os::{ + kernel::haxpy, + pim::{ + self, interleaved_array, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +const ROWS: usize = 1024; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&haxpy::KERNEL); + + let a = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), + )); + let b = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))), + )); + + let scalar_vector = SVector::::from_element(F16x16([F16x1(f16::NEG_ONE); 16])); + let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector)); + + // writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + + let mut c = Box::new(pim::continuous_array::Vector( + SVector::::zeros(), + )); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + { + pim::state::set_bank_mode(BankMode::PimAllBank); + + haxpy::execute( + &a.0, + &b.0, + &interleaved_scalar_vector, + &mut c.0, + dummy.as_ref(), + ); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{}", c.0).unwrap(); + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/vmul.rs b/pim-os/src/bin/vmul.rs index 949b8d7..ef59a81 100644 --- a/pim-os/src/bin/vmul.rs +++ b/pim-os/src/bin/vmul.rs @@ -10,7 +10,7 @@ use half::f16; use nalgebra::SVector; use pim_isa::BankMode; use pim_os::{ - kernel::vadd, + kernel::vmul, pim::{self, vector::F16x1}, uart::Uart0, }; @@ -19,7 +19,7 @@ const ROWS: usize = 2048; #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&vadd::KERNEL); + pim::state::set_kernel(&vmul::KERNEL); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|_, _| F16x1(f16::from_f32(2 as _))), @@ -43,7 +43,7 @@ pub extern "C" fn main() { { pim::state::set_bank_mode(BankMode::PimAllBank); - vadd::execute(&a.0, &b.0, &mut c.0, dummy.as_ref()); + vmul::execute(&a.0, &b.0, &mut c.0, dummy.as_ref()); pim::state::set_bank_mode(BankMode::SingleBank); } diff --git a/pim-os/src/kernel.rs b/pim-os/src/kernel.rs index 3247fce..fa664e3 100644 --- a/pim-os/src/kernel.rs +++ b/pim-os/src/kernel.rs @@ -1,3 +1,5 @@ -pub mod legacy; pub mod gemv; +pub mod haxpy; +pub mod legacy; pub mod vadd; +pub mod vmul; diff --git a/pim-os/src/kernel/gemv.rs b/pim-os/src/kernel/gemv.rs index 42c6e16..20dda61 100644 --- a/pim-os/src/kernel/gemv.rs +++ b/pim-os/src/kernel/gemv.rs @@ -79,13 +79,13 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); -pub fn execute( - matrix: &Matrix, +pub fn execute( + matrix: &Matrix<2, X16C>, input_vector: &interleaved_array::Vector, output_partial_sum_vector: &mut SVector, dummy: &impl PimOperand, ) { - for block in input_vector.0.as_slice().iter() { + for block in input_vector.0.iter() { block.execute_read(); } @@ -98,7 +98,7 @@ pub fn execute( barrier::dsb(barrier::SY); for chunk in output_partial_sum_vector - .fixed_rows_with_step_mut::(0, 16) + .fixed_rows_with_step_mut::<2>(0, 16) .iter_mut() { chunk.execute_write(); diff --git a/pim-os/src/kernel/haxpy.rs b/pim-os/src/kernel/haxpy.rs new file mode 100644 index 0000000..7587af0 --- /dev/null +++ b/pim-os/src/kernel/haxpy.rs @@ -0,0 +1,132 @@ +use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1}; +use nalgebra::SVector; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MUL { + src0: File::SrfA { index: 0 }, + src1: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::SrfA { index: 1 }, + src1: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::SrfA { index: 2 }, + src1: File::GrfA { index: 2 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::SrfA { index: 3 }, + src1: File::GrfA { index: 3 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + a: &SVector, + b: &SVector, + interleaved_scalar: &interleaved_array::Vector<1>, + c: &mut SVector, + dummy: &impl PimOperand, +) { + interleaved_scalar.execute_read(); + + a.fixed_rows_with_step::<4>(0, 16 * 16) + .iter() + .for_each(|entry| entry.execute_read()); + + b.fixed_rows_with_step::<4>(0, 16 * 16) + .iter() + .for_each(|entry| { + dummy.execute_read(); + entry.execute_read(); + }); + + c.fixed_rows_with_step_mut::<4>(0, 16 * 16) + .iter_mut() + .for_each(|entry| entry.execute_write()); + + dummy.execute_read(); +} diff --git a/pim-os/src/kernel/legacy.rs b/pim-os/src/kernel/legacy.rs index 2bde5ff..667d935 100644 --- a/pim-os/src/kernel/legacy.rs +++ b/pim-os/src/kernel/legacy.rs @@ -1,3 +1,3 @@ pub mod matrix_matrix_mul; pub mod matrix_scalar_mul; -pub mod matrix_vector_mul; \ No newline at end of file +pub mod matrix_vector_mul; diff --git a/pim-os/src/kernel/legacy/matrix_scalar_mul.rs b/pim-os/src/kernel/legacy/matrix_scalar_mul.rs index 1f2f3ea..f93c433 100644 --- a/pim-os/src/kernel/legacy/matrix_scalar_mul.rs +++ b/pim-os/src/kernel/legacy/matrix_scalar_mul.rs @@ -122,4 +122,4 @@ pub fn execute( dummy_array.execute_instruction_read_single_bank(0); } -} \ No newline at end of file +} diff --git a/pim-os/src/pim/legacy.rs b/pim-os/src/pim/legacy.rs index 615880a..f5d68fc 100644 --- a/pim-os/src/pim/legacy.rs +++ b/pim-os/src/pim/legacy.rs @@ -1 +1 @@ -pub mod array; \ No newline at end of file +pub mod array;