Fine tune break-even point

This commit is contained in:
2024-02-29 14:47:57 +01:00
parent 4661ab7051
commit d3bb75b84b
4 changed files with 29 additions and 22 deletions

View File

@@ -16,7 +16,7 @@ SECTIONS
.bss : { *(.bss) } > dram .bss : { *(.bss) } > dram
. = ALIGN(8); . = ALIGN(8);
. = . + 0x100000; # 1 MiB Stack . = . + 0x10000000; # 100 MiB Stack
LD_STACK_PTR = .; LD_STACK_PTR = .;
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config .pim_config : { KEEP(*(.pim_config)) } > dram_pim_config

View File

@@ -4,34 +4,41 @@
extern crate alloc; extern crate alloc;
use core::fmt::Write; use core::fmt::Write;
use half::f16;
use nalgebra::{SMatrix, SVector}; use nalgebra::{SMatrix, SVector};
use pim_os::{pim::vector::F16x1, uart::Uart0}; use pim_os::{pim::vector::F16x1, uart::Uart0};
use half::f16;
#[cfg(feature = "X1")] #[cfg(feature = "X1")]
const ROWS: usize = 128; const DIMENSIONS: usize = 128;
#[cfg(feature = "X2")] #[cfg(feature = "X2")]
const ROWS: usize = 256; const DIMENSIONS: usize = 256;
#[cfg(feature = "X3")] #[cfg(feature = "X3")]
const ROWS: usize = 512; const DIMENSIONS: usize = 512;
#[cfg(feature = "X4")] #[cfg(feature = "X4")]
const ROWS: usize = 1024; const DIMENSIONS: usize = 1024;
const COLUMNS: usize = 128; const ROWS: usize = DIMENSIONS;
const COLUMNS: usize = DIMENSIONS;
#[no_mangle] #[no_mangle]
pub extern "C" fn main() { pub extern "C" fn main() {
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros(); let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
let input_vector = SVector::<F16x1, COLUMNS>::zeros(); let mut input_vector = SVector::<F16x1, COLUMNS>::zeros();
pim_os::m5op::exit(0); pim_os::m5op::exit(0);
for _ in 0..5 { for _ in 0..1 {
let mut output_vector = matrix * input_vector; input_vector = matrix * input_vector;
output_vector = output_vector.map(|element| if element.0 < f16::ZERO { F16x1(f16::ZERO) } else { element }); input_vector = input_vector.map(|element| {
core::hint::black_box(output_vector); if element.0 < f16::ZERO {
F16x1(f16::ZERO)
} else {
element
}
});
core::hint::black_box(input_vector);
} }
pim_os::m5op::exit(0); pim_os::m5op::exit(0);

View File

@@ -18,11 +18,6 @@ use pim_os::{
uart::Uart0, uart::Uart0,
}; };
const ROWS: usize = 128;
const COLUMNS: usize = 128;
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[cfg(feature = "X1")] #[cfg(feature = "X1")]
const REPETITIONS: usize = 1; const REPETITIONS: usize = 1;
@@ -35,6 +30,11 @@ const REPETITIONS: usize = 4;
#[cfg(feature = "X4")] #[cfg(feature = "X4")]
const REPETITIONS: usize = 8; const REPETITIONS: usize = 8;
const ROWS: usize = 128;
const COLUMNS: usize = 128; // Has to be 128. Assume DIMENSIONS and fill rest with zeros.
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[no_mangle] #[no_mangle]
pub extern "C" fn main() { pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL_X4); pim::state::set_kernel(&gemv::KERNEL_X4);
@@ -54,7 +54,7 @@ pub extern "C" fn main() {
barrier::dsb(barrier::SY); barrier::dsb(barrier::SY);
pim_os::m5op::exit(0); pim_os::m5op::exit(0);
for _ in 0..5 { for _ in 0..1 {
let interleaved_input_vector = let interleaved_input_vector =
Box::new(interleaved_array::Vector::from(input_vector.clone())); Box::new(interleaved_array::Vector::from(input_vector.clone()));
@@ -62,7 +62,7 @@ pub extern "C" fn main() {
{ {
pim::state::set_bank_mode(BankMode::PimAllBank); pim::state::set_bank_mode(BankMode::PimAllBank);
for _ in 0..REPETITIONS { for _ in 0..(REPETITIONS * REPETITIONS) {
gemv::execute( gemv::execute(
pim_matrix.as_ref(), pim_matrix.as_ref(),
interleaved_input_vector.as_ref(), interleaved_input_vector.as_ref(),

View File

@@ -322,9 +322,9 @@ pub const KERNEL_X4: Kernel = Kernel([
Instruction::NOP, Instruction::NOP,
]); ]);
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>( pub fn execute<const X16R: usize, const R: usize>(
matrix: &Matrix<X16R, X16C>, matrix: &Matrix<X16R, 8>,
input_vector: &interleaved_array::Vector<X16C>, input_vector: &interleaved_array::Vector<8>,
output_partial_sum_vector: &mut SVector<F16x16, R>, output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand, dummy: &impl PimOperand,
) { ) {