Fine tune break-even point
This commit is contained in:
@@ -16,7 +16,7 @@ SECTIONS
|
|||||||
.bss : { *(.bss) } > dram
|
.bss : { *(.bss) } > dram
|
||||||
|
|
||||||
. = ALIGN(8);
|
. = ALIGN(8);
|
||||||
. = . + 0x100000; # 1 MiB Stack
|
. = . + 0x10000000; # 100 MiB Stack
|
||||||
LD_STACK_PTR = .;
|
LD_STACK_PTR = .;
|
||||||
|
|
||||||
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config
|
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config
|
||||||
|
|||||||
@@ -4,34 +4,41 @@
|
|||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::fmt::Write;
|
use core::fmt::Write;
|
||||||
|
use half::f16;
|
||||||
use nalgebra::{SMatrix, SVector};
|
use nalgebra::{SMatrix, SVector};
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
use half::f16;
|
|
||||||
|
|
||||||
#[cfg(feature = "X1")]
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 128;
|
const DIMENSIONS: usize = 128;
|
||||||
|
|
||||||
#[cfg(feature = "X2")]
|
#[cfg(feature = "X2")]
|
||||||
const ROWS: usize = 256;
|
const DIMENSIONS: usize = 256;
|
||||||
|
|
||||||
#[cfg(feature = "X3")]
|
#[cfg(feature = "X3")]
|
||||||
const ROWS: usize = 512;
|
const DIMENSIONS: usize = 512;
|
||||||
|
|
||||||
#[cfg(feature = "X4")]
|
#[cfg(feature = "X4")]
|
||||||
const ROWS: usize = 1024;
|
const DIMENSIONS: usize = 1024;
|
||||||
|
|
||||||
const COLUMNS: usize = 128;
|
const ROWS: usize = DIMENSIONS;
|
||||||
|
const COLUMNS: usize = DIMENSIONS;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
||||||
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
let mut input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
for _ in 0..5 {
|
for _ in 0..1 {
|
||||||
let mut output_vector = matrix * input_vector;
|
input_vector = matrix * input_vector;
|
||||||
output_vector = output_vector.map(|element| if element.0 < f16::ZERO { F16x1(f16::ZERO) } else { element });
|
input_vector = input_vector.map(|element| {
|
||||||
core::hint::black_box(output_vector);
|
if element.0 < f16::ZERO {
|
||||||
|
F16x1(f16::ZERO)
|
||||||
|
} else {
|
||||||
|
element
|
||||||
|
}
|
||||||
|
});
|
||||||
|
core::hint::black_box(input_vector);
|
||||||
}
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
|||||||
@@ -18,11 +18,6 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
const ROWS: usize = 128;
|
|
||||||
const COLUMNS: usize = 128;
|
|
||||||
const X16_ROWS: usize = ROWS / 16;
|
|
||||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
|
||||||
|
|
||||||
#[cfg(feature = "X1")]
|
#[cfg(feature = "X1")]
|
||||||
const REPETITIONS: usize = 1;
|
const REPETITIONS: usize = 1;
|
||||||
|
|
||||||
@@ -35,6 +30,11 @@ const REPETITIONS: usize = 4;
|
|||||||
#[cfg(feature = "X4")]
|
#[cfg(feature = "X4")]
|
||||||
const REPETITIONS: usize = 8;
|
const REPETITIONS: usize = 8;
|
||||||
|
|
||||||
|
const ROWS: usize = 128;
|
||||||
|
const COLUMNS: usize = 128; // Has to be 128. Assume DIMENSIONS and fill rest with zeros.
|
||||||
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X4);
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
@@ -54,7 +54,7 @@ pub extern "C" fn main() {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
for _ in 0..5 {
|
for _ in 0..1 {
|
||||||
let interleaved_input_vector =
|
let interleaved_input_vector =
|
||||||
Box::new(interleaved_array::Vector::from(input_vector.clone()));
|
Box::new(interleaved_array::Vector::from(input_vector.clone()));
|
||||||
|
|
||||||
@@ -62,7 +62,7 @@ pub extern "C" fn main() {
|
|||||||
{
|
{
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
for _ in 0..REPETITIONS {
|
for _ in 0..(REPETITIONS * REPETITIONS) {
|
||||||
gemv::execute(
|
gemv::execute(
|
||||||
pim_matrix.as_ref(),
|
pim_matrix.as_ref(),
|
||||||
interleaved_input_vector.as_ref(),
|
interleaved_input_vector.as_ref(),
|
||||||
|
|||||||
@@ -322,9 +322,9 @@ pub const KERNEL_X4: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
pub fn execute<const X16R: usize, const R: usize>(
|
||||||
matrix: &Matrix<X16R, X16C>,
|
matrix: &Matrix<X16R, 8>,
|
||||||
input_vector: &interleaved_array::Vector<X16C>,
|
input_vector: &interleaved_array::Vector<8>,
|
||||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||||
dummy: &impl PimOperand,
|
dummy: &impl PimOperand,
|
||||||
) {
|
) {
|
||||||
|
|||||||
Reference in New Issue
Block a user