diff --git a/pim-os/src/bin/classic_gemv.rs b/pim-os/src/bin/classic_gemv.rs index f0ca28a..3286085 100644 --- a/pim-os/src/bin/classic_gemv.rs +++ b/pim-os/src/bin/classic_gemv.rs @@ -3,21 +3,21 @@ extern crate alloc; -use core::fmt::Write; +use core::{arch::asm, fmt::Write}; use nalgebra::{SMatrix, SVector}; use pim_os::{pim::vector::F16x1, uart::Uart0}; #[cfg(feature = "X1")] -const ROWS: usize = 16; +const ROWS: usize = 128; #[cfg(feature = "X2")] -const ROWS: usize = 32; +const ROWS: usize = 256; #[cfg(feature = "X3")] -const ROWS: usize = 64; +const ROWS: usize = 512; #[cfg(feature = "X4")] -const ROWS: usize = 128; +const ROWS: usize = 1024; const COLUMNS: usize = 128; @@ -26,9 +26,19 @@ pub extern "C" fn main() { let matrix = SMatrix::::zeros(); let input_vector = SVector::::zeros(); + // Flush cache + for element in matrix.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + for element in input_vector.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + pim_os::m5op::exit(0); - let output_vector = matrix * input_vector; - core::hint::black_box(output_vector); + for _ in 0..10 { + let output_vector = matrix * input_vector; + core::hint::black_box(output_vector); + } pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/classic_gemv_layers.rs b/pim-os/src/bin/classic_gemv_layers.rs index a9d1fac..0811b5e 100644 --- a/pim-os/src/bin/classic_gemv_layers.rs +++ b/pim-os/src/bin/classic_gemv_layers.rs @@ -6,18 +6,33 @@ extern crate alloc; use core::fmt::Write; use nalgebra::{SMatrix, SVector}; use pim_os::{pim::vector::F16x1, uart::Uart0}; +use half::f16; +#[cfg(feature = "X1")] const ROWS: usize = 128; + +#[cfg(feature = "X2")] +const ROWS: usize = 256; + +#[cfg(feature = "X3")] +const ROWS: usize = 512; + +#[cfg(feature = "X4")] +const ROWS: usize = 1024; + const COLUMNS: usize = 128; #[no_mangle] pub extern "C" fn main() { let matrix = SMatrix::::zeros(); - let mut input_vector = SVector::::zeros(); + let input_vector = SVector::::zeros(); pim_os::m5op::exit(0); - input_vector = matrix * input_vector; - core::hint::black_box(input_vector); + for _ in 0..5 { + let mut output_vector = matrix * input_vector; + output_vector = output_vector.map(|element| if element.0 < f16::ZERO { F16x1(f16::ZERO) } else { element }); + core::hint::black_box(output_vector); + } pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/classic_haxpy.rs b/pim-os/src/bin/classic_haxpy.rs index 8fc2b5e..9594543 100644 --- a/pim-os/src/bin/classic_haxpy.rs +++ b/pim-os/src/bin/classic_haxpy.rs @@ -3,7 +3,7 @@ extern crate alloc; -use core::fmt::Write; +use core::{arch::asm, fmt::Write}; use nalgebra::SVector; use num_traits::identities::Zero; use pim_os::{pim::vector::F16x1, uart::Uart0}; @@ -26,10 +26,20 @@ pub extern "C" fn main() { let b = SVector::::zeros(); let s = F16x1::zero(); + // Flush cache + for element in a.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + for element in b.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + pim_os::m5op::exit(0); - let a_s = a * s; - let c = a_s.component_mul(&b); - core::hint::black_box(c); + for _ in 0..10 { + let a_s = a * s; + let c = a_s.component_mul(&b); + core::hint::black_box(c); + } pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/classic_vadd.rs b/pim-os/src/bin/classic_vadd.rs index 5cad6a1..d76d2dc 100644 --- a/pim-os/src/bin/classic_vadd.rs +++ b/pim-os/src/bin/classic_vadd.rs @@ -3,7 +3,7 @@ extern crate alloc; -use core::fmt::Write; +use core::{arch::asm, fmt::Write}; use nalgebra::SVector; use pim_os::{pim::vector::F16x1, uart::Uart0}; @@ -24,9 +24,18 @@ pub extern "C" fn main() { let a = SVector::::zeros(); let b = SVector::::zeros(); + for element in a.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + for element in b.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + pim_os::m5op::exit(0); - let c = a + b; - core::hint::black_box(c); + for _ in 0..10 { + let c = a + b; + core::hint::black_box(c); + } pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/classic_vmul.rs b/pim-os/src/bin/classic_vmul.rs index 46b0a6b..fcff7c9 100644 --- a/pim-os/src/bin/classic_vmul.rs +++ b/pim-os/src/bin/classic_vmul.rs @@ -3,7 +3,7 @@ extern crate alloc; -use core::fmt::Write; +use core::{arch::asm, fmt::Write}; use nalgebra::SVector; use pim_os::{pim::vector::F16x1, uart::Uart0}; @@ -24,9 +24,18 @@ pub extern "C" fn main() { let a = SVector::::zeros(); let b = SVector::::zeros(); + for element in a.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + for element in b.iter() { + unsafe { asm!("dc civac, {val}", val = in(reg) element) } + } + pim_os::m5op::exit(0); - let c = a.component_mul(&b); - core::hint::black_box(c); + for _ in 0..10 { + let c = a.component_mul(&b); + core::hint::black_box(c); + } pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/gemv.rs b/pim-os/src/bin/gemv.rs index 3617853..faf7ab6 100644 --- a/pim-os/src/bin/gemv.rs +++ b/pim-os/src/bin/gemv.rs @@ -18,34 +18,35 @@ use pim_os::{ uart::Uart0, }; -#[cfg(feature = "X1")] -const ROWS: usize = 16; - -#[cfg(feature = "X2")] -const ROWS: usize = 32; - -#[cfg(feature = "X3")] -const ROWS: usize = 64; - -#[cfg(feature = "X4")] const ROWS: usize = 128; - const COLUMNS: usize = 128; const X16_ROWS: usize = ROWS / 16; const X16_COLUMNS: usize = COLUMNS / 16; +#[cfg(feature = "X1")] +const REPETITIONS: usize = 1; + +#[cfg(feature = "X2")] +const REPETITIONS: usize = 2; + +#[cfg(feature = "X3")] +const REPETITIONS: usize = 4; + +#[cfg(feature = "X4")] +const REPETITIONS: usize = 8; + #[no_mangle] pub extern "C" fn main() { - #[cfg(feature = "X1")] - pim::state::set_kernel(&gemv::KERNEL_X1); + // #[cfg(feature = "X1")] + // pim::state::set_kernel(&gemv::KERNEL_X1); - #[cfg(feature = "X2")] - pim::state::set_kernel(&gemv::KERNEL_X2); + // #[cfg(feature = "X2")] + // pim::state::set_kernel(&gemv::KERNEL_X2); - #[cfg(feature = "X3")] - pim::state::set_kernel(&gemv::KERNEL_X3); + // #[cfg(feature = "X3")] + // pim::state::set_kernel(&gemv::KERNEL_X3); - #[cfg(feature = "X4")] + // #[cfg(feature = "X4")] pim::state::set_kernel(&gemv::KERNEL_X4); let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); @@ -63,32 +64,35 @@ pub extern "C" fn main() { // Verify everything is correctly initialized before PIM operation barrier::dsb(barrier::SY); - // Execute kernel - { - pim::state::set_bank_mode(BankMode::PimAllBank); + pim_os::m5op::exit(0); + for _ in 0..10 { + // Execute kernel + { + pim::state::set_bank_mode(BankMode::PimAllBank); - pim_os::m5op::exit(0); - gemv::execute( - pim_matrix.as_ref(), - interleaved_input_vector.as_ref(), - output_partial_sum_vector.as_mut(), - dummy.as_ref(), - ); - pim_os::m5op::exit(0); + for _ in 0..REPETITIONS { + gemv::execute( + pim_matrix.as_ref(), + interleaved_input_vector.as_ref(), + output_partial_sum_vector.as_mut(), + dummy.as_ref(), + ); + } - pim::state::set_bank_mode(BankMode::SingleBank); + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + + let output_vector = SVector::::from_fn(|r, _| { + output_partial_sum_vector[r] + .0 + .iter() + .fold(F16x1::zero(), |acc, val| acc + *val) + }); + core::hint::black_box(output_vector); } - - // writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); - - let output_vector = SVector::::from_fn(|r, _| { - output_partial_sum_vector[r] - .0 - .iter() - .fold(F16x1::zero(), |acc, val| acc + *val) - }); - - core::hint::black_box(output_vector); + pim_os::m5op::exit(0); // writeln!(Uart0, "{output_vector}").unwrap(); writeln!(Uart0, "Done").unwrap(); diff --git a/pim-os/src/bin/gemv_layers.rs b/pim-os/src/bin/gemv_layers.rs index dd2cf61..e0db3e2 100644 --- a/pim-os/src/bin/gemv_layers.rs +++ b/pim-os/src/bin/gemv_layers.rs @@ -23,6 +23,18 @@ const COLUMNS: usize = 128; const X16_ROWS: usize = ROWS / 16; const X16_COLUMNS: usize = COLUMNS / 16; +#[cfg(feature = "X1")] +const REPETITIONS: usize = 1; + +#[cfg(feature = "X2")] +const REPETITIONS: usize = 2; + +#[cfg(feature = "X3")] +const REPETITIONS: usize = 4; + +#[cfg(feature = "X4")] +const REPETITIONS: usize = 8; + #[no_mangle] pub extern "C" fn main() { pim::state::set_kernel(&gemv::KERNEL_X4); @@ -38,33 +50,42 @@ pub extern "C" fn main() { let dummy = Box::new(0); - let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector.clone())); - // Verify everything is correctly initialized before PIM operation barrier::dsb(barrier::SY); - // Execute kernel - { - pim::state::set_bank_mode(BankMode::PimAllBank); + pim_os::m5op::exit(0); + for _ in 0..5 { + let interleaved_input_vector = + Box::new(interleaved_array::Vector::from(input_vector.clone())); - gemv::execute( - pim_matrix.as_ref(), - interleaved_input_vector.as_ref(), - output_partial_sum_vector.as_mut(), - dummy.as_ref(), - ); + // Execute kernel + { + pim::state::set_bank_mode(BankMode::PimAllBank); - pim::state::set_bank_mode(BankMode::SingleBank); + for _ in 0..REPETITIONS { + gemv::execute( + pim_matrix.as_ref(), + interleaved_input_vector.as_ref(), + output_partial_sum_vector.as_mut(), + dummy.as_ref(), + ); + } + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + + let output_vector = SVector::::from_fn(|r, _| { + output_partial_sum_vector[r] + .0 + .iter() + .fold(F16x1::zero(), |acc, val| acc + *val) + }); + + core::hint::black_box(output_vector); } - - let output_vector = SVector::::from_fn(|r, _| { - output_partial_sum_vector[r] - .0 - .iter() - .fold(F16x1::zero(), |acc, val| acc + *val) - }); - - core::hint::black_box(output_vector); + pim_os::m5op::exit(0); writeln!(Uart0, "Done").unwrap(); } diff --git a/pim-os/src/bin/haxpy.rs b/pim-os/src/bin/haxpy.rs index 6e7b9a6..c51df62 100644 --- a/pim-os/src/bin/haxpy.rs +++ b/pim-os/src/bin/haxpy.rs @@ -74,13 +74,15 @@ pub extern "C" fn main() { pim::state::set_bank_mode(BankMode::PimAllBank); pim_os::m5op::exit(0); - haxpy::execute::( - &a.0, - &b.0, - &interleaved_scalar_vector, - &mut c.0, - dummy.as_ref(), - ); + for _ in 0..10 { + haxpy::execute::( + &a.0, + &b.0, + &interleaved_scalar_vector, + &mut c.0, + dummy.as_ref(), + ); + } pim_os::m5op::exit(0); pim::state::set_bank_mode(BankMode::SingleBank); diff --git a/pim-os/src/bin/vadd.rs b/pim-os/src/bin/vadd.rs index a38ecd6..5892fc9 100644 --- a/pim-os/src/bin/vadd.rs +++ b/pim-os/src/bin/vadd.rs @@ -68,7 +68,9 @@ pub extern "C" fn main() { pim::state::set_bank_mode(BankMode::PimAllBank); pim_os::m5op::exit(0); - vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + for _ in 0..10 { + vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + } pim_os::m5op::exit(0); pim::state::set_bank_mode(BankMode::SingleBank); diff --git a/pim-os/src/bin/vmul.rs b/pim-os/src/bin/vmul.rs index b667b94..c9ab53b 100644 --- a/pim-os/src/bin/vmul.rs +++ b/pim-os/src/bin/vmul.rs @@ -68,7 +68,9 @@ pub extern "C" fn main() { pim::state::set_bank_mode(BankMode::PimAllBank); pim_os::m5op::exit(0); - vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + for _ in 0..10 { + vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + } pim_os::m5op::exit(0); pim::state::set_bank_mode(BankMode::SingleBank);