Introduce repetitions in kernels
This commit is contained in:
@@ -3,21 +3,21 @@
|
|||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::fmt::Write;
|
use core::{arch::asm, fmt::Write};
|
||||||
use nalgebra::{SMatrix, SVector};
|
use nalgebra::{SMatrix, SVector};
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
#[cfg(feature = "X1")]
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 16;
|
const ROWS: usize = 128;
|
||||||
|
|
||||||
#[cfg(feature = "X2")]
|
#[cfg(feature = "X2")]
|
||||||
const ROWS: usize = 32;
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
#[cfg(feature = "X3")]
|
#[cfg(feature = "X3")]
|
||||||
const ROWS: usize = 64;
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
#[cfg(feature = "X4")]
|
#[cfg(feature = "X4")]
|
||||||
const ROWS: usize = 128;
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
const COLUMNS: usize = 128;
|
const COLUMNS: usize = 128;
|
||||||
|
|
||||||
@@ -26,9 +26,19 @@ pub extern "C" fn main() {
|
|||||||
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
||||||
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
||||||
|
|
||||||
|
// Flush cache
|
||||||
|
for element in matrix.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in input_vector.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
let output_vector = matrix * input_vector;
|
for _ in 0..10 {
|
||||||
core::hint::black_box(output_vector);
|
let output_vector = matrix * input_vector;
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -6,18 +6,33 @@ extern crate alloc;
|
|||||||
use core::fmt::Write;
|
use core::fmt::Write;
|
||||||
use nalgebra::{SMatrix, SVector};
|
use nalgebra::{SMatrix, SVector};
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
use half::f16;
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 128;
|
const ROWS: usize = 128;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
const COLUMNS: usize = 128;
|
const COLUMNS: usize = 128;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
||||||
let mut input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
input_vector = matrix * input_vector;
|
for _ in 0..5 {
|
||||||
core::hint::black_box(input_vector);
|
let mut output_vector = matrix * input_vector;
|
||||||
|
output_vector = output_vector.map(|element| if element.0 < f16::ZERO { F16x1(f16::ZERO) } else { element });
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::fmt::Write;
|
use core::{arch::asm, fmt::Write};
|
||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use num_traits::identities::Zero;
|
use num_traits::identities::Zero;
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
@@ -26,10 +26,20 @@ pub extern "C" fn main() {
|
|||||||
let b = SVector::<F16x1, ROWS>::zeros();
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
let s = F16x1::zero();
|
let s = F16x1::zero();
|
||||||
|
|
||||||
|
// Flush cache
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
let a_s = a * s;
|
for _ in 0..10 {
|
||||||
let c = a_s.component_mul(&b);
|
let a_s = a * s;
|
||||||
core::hint::black_box(c);
|
let c = a_s.component_mul(&b);
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::fmt::Write;
|
use core::{arch::asm, fmt::Write};
|
||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
@@ -24,9 +24,18 @@ pub extern "C" fn main() {
|
|||||||
let a = SVector::<F16x1, ROWS>::zeros();
|
let a = SVector::<F16x1, ROWS>::zeros();
|
||||||
let b = SVector::<F16x1, ROWS>::zeros();
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
let c = a + b;
|
for _ in 0..10 {
|
||||||
core::hint::black_box(c);
|
let c = a + b;
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::fmt::Write;
|
use core::{arch::asm, fmt::Write};
|
||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
@@ -24,9 +24,18 @@ pub extern "C" fn main() {
|
|||||||
let a = SVector::<F16x1, ROWS>::zeros();
|
let a = SVector::<F16x1, ROWS>::zeros();
|
||||||
let b = SVector::<F16x1, ROWS>::zeros();
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
let c = a.component_mul(&b);
|
for _ in 0..10 {
|
||||||
core::hint::black_box(c);
|
let c = a.component_mul(&b);
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -18,34 +18,35 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[cfg(feature = "X1")]
|
|
||||||
const ROWS: usize = 16;
|
|
||||||
|
|
||||||
#[cfg(feature = "X2")]
|
|
||||||
const ROWS: usize = 32;
|
|
||||||
|
|
||||||
#[cfg(feature = "X3")]
|
|
||||||
const ROWS: usize = 64;
|
|
||||||
|
|
||||||
#[cfg(feature = "X4")]
|
|
||||||
const ROWS: usize = 128;
|
const ROWS: usize = 128;
|
||||||
|
|
||||||
const COLUMNS: usize = 128;
|
const COLUMNS: usize = 128;
|
||||||
const X16_ROWS: usize = ROWS / 16;
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const REPETITIONS: usize = 1;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const REPETITIONS: usize = 2;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const REPETITIONS: usize = 4;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const REPETITIONS: usize = 8;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
#[cfg(feature = "X1")]
|
// #[cfg(feature = "X1")]
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X1);
|
// pim::state::set_kernel(&gemv::KERNEL_X1);
|
||||||
|
|
||||||
#[cfg(feature = "X2")]
|
// #[cfg(feature = "X2")]
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X2);
|
// pim::state::set_kernel(&gemv::KERNEL_X2);
|
||||||
|
|
||||||
#[cfg(feature = "X3")]
|
// #[cfg(feature = "X3")]
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X3);
|
// pim::state::set_kernel(&gemv::KERNEL_X3);
|
||||||
|
|
||||||
#[cfg(feature = "X4")]
|
// #[cfg(feature = "X4")]
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X4);
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
|
|
||||||
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
||||||
@@ -63,32 +64,35 @@ pub extern "C" fn main() {
|
|||||||
// Verify everything is correctly initialized before PIM operation
|
// Verify everything is correctly initialized before PIM operation
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
// Execute kernel
|
pim_os::m5op::exit(0);
|
||||||
{
|
for _ in 0..10 {
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
// Execute kernel
|
||||||
|
{
|
||||||
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
for _ in 0..REPETITIONS {
|
||||||
gemv::execute(
|
gemv::execute(
|
||||||
pim_matrix.as_ref(),
|
pim_matrix.as_ref(),
|
||||||
interleaved_input_vector.as_ref(),
|
interleaved_input_vector.as_ref(),
|
||||||
output_partial_sum_vector.as_mut(),
|
output_partial_sum_vector.as_mut(),
|
||||||
dummy.as_ref(),
|
dummy.as_ref(),
|
||||||
);
|
);
|
||||||
pim_os::m5op::exit(0);
|
}
|
||||||
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
||||||
|
|
||||||
|
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
||||||
|
output_partial_sum_vector[r]
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.fold(F16x1::zero(), |acc, val| acc + *val)
|
||||||
|
});
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
}
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
|
||||||
|
|
||||||
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
|
||||||
output_partial_sum_vector[r]
|
|
||||||
.0
|
|
||||||
.iter()
|
|
||||||
.fold(F16x1::zero(), |acc, val| acc + *val)
|
|
||||||
});
|
|
||||||
|
|
||||||
core::hint::black_box(output_vector);
|
|
||||||
|
|
||||||
// writeln!(Uart0, "{output_vector}").unwrap();
|
// writeln!(Uart0, "{output_vector}").unwrap();
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
|||||||
@@ -23,6 +23,18 @@ const COLUMNS: usize = 128;
|
|||||||
const X16_ROWS: usize = ROWS / 16;
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const REPETITIONS: usize = 1;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const REPETITIONS: usize = 2;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const REPETITIONS: usize = 4;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const REPETITIONS: usize = 8;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&gemv::KERNEL_X4);
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
@@ -38,33 +50,42 @@ pub extern "C" fn main() {
|
|||||||
|
|
||||||
let dummy = Box::new(0);
|
let dummy = Box::new(0);
|
||||||
|
|
||||||
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector.clone()));
|
|
||||||
|
|
||||||
// Verify everything is correctly initialized before PIM operation
|
// Verify everything is correctly initialized before PIM operation
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
// Execute kernel
|
pim_os::m5op::exit(0);
|
||||||
{
|
for _ in 0..5 {
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
let interleaved_input_vector =
|
||||||
|
Box::new(interleaved_array::Vector::from(input_vector.clone()));
|
||||||
|
|
||||||
gemv::execute(
|
// Execute kernel
|
||||||
pim_matrix.as_ref(),
|
{
|
||||||
interleaved_input_vector.as_ref(),
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
output_partial_sum_vector.as_mut(),
|
|
||||||
dummy.as_ref(),
|
|
||||||
);
|
|
||||||
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
for _ in 0..REPETITIONS {
|
||||||
|
gemv::execute(
|
||||||
|
pim_matrix.as_ref(),
|
||||||
|
interleaved_input_vector.as_ref(),
|
||||||
|
output_partial_sum_vector.as_mut(),
|
||||||
|
dummy.as_ref(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
||||||
|
|
||||||
|
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
||||||
|
output_partial_sum_vector[r]
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.fold(F16x1::zero(), |acc, val| acc + *val)
|
||||||
|
});
|
||||||
|
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
}
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
|
||||||
output_partial_sum_vector[r]
|
|
||||||
.0
|
|
||||||
.iter()
|
|
||||||
.fold(F16x1::zero(), |acc, val| acc + *val)
|
|
||||||
});
|
|
||||||
|
|
||||||
core::hint::black_box(output_vector);
|
|
||||||
|
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,13 +74,15 @@ pub extern "C" fn main() {
|
|||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
haxpy::execute::<ROWS, BLOCKS>(
|
for _ in 0..10 {
|
||||||
&a.0,
|
haxpy::execute::<ROWS, BLOCKS>(
|
||||||
&b.0,
|
&a.0,
|
||||||
&interleaved_scalar_vector,
|
&b.0,
|
||||||
&mut c.0,
|
&interleaved_scalar_vector,
|
||||||
dummy.as_ref(),
|
&mut c.0,
|
||||||
);
|
dummy.as_ref(),
|
||||||
|
);
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
|||||||
@@ -68,7 +68,9 @@ pub extern "C" fn main() {
|
|||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
for _ in 0..10 {
|
||||||
|
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
|||||||
@@ -68,7 +68,9 @@ pub extern "C" fn main() {
|
|||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
for _ in 0..10 {
|
||||||
|
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
||||||
|
}
|
||||||
pim_os::m5op::exit(0);
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
|||||||
Reference in New Issue
Block a user