Functional matrix operation

This commit is contained in:
2023-12-23 15:48:12 +01:00
parent 66a688c2e9
commit e49b667b8a
5 changed files with 196 additions and 154 deletions

View File

@@ -10,11 +10,9 @@ use core::{
sync::atomic::{compiler_fence, Ordering},
};
use half::f16;
use nalgebra::{Const, Matrix, Matrix2, SMatrixView};
use nalgebra::Matrix;
use pim::{
array::{PimMatrixArena, PimRegion, PimStorage},
// array::PimMatrix,
// array::{BankArray, ComputeArray},
kernel::TEST_KERNEL,
matrix::{F16x1, F16x16},
state::PimState,
@@ -32,7 +30,7 @@ pub extern "C" fn entry() -> ! {
let mut uart = Uart0;
let mut pim_state = PimState::new(&TEST_KERNEL);
let mut pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
let pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
let pim_storage0 = PimStorage {
arena: &pim_matrix_arena,
index: 0,
@@ -46,40 +44,25 @@ pub extern "C" fn entry() -> ! {
index: 2,
};
let mut matrix0 = Matrix::from_data(pim_storage0);
let mut matrix1 = Matrix::from_data(pim_storage1);
let matrix1 = Matrix::from_data(pim_storage1);
matrix0.fill_column(0, F16x1(f16::ZERO));
matrix0.fill_column(1, F16x1(f16::ONE));
matrix0.fill_column(2, F16x1(f16::PI));
matrix0.fill_column(3, F16x1(f16::E));
matrix0.fill_column(4, F16x1(f16::EPSILON));
matrix0.fill_column(5, F16x1(f16::SQRT_2));
// matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
// matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0);
matrix0.fill_column(6, F16x1(f16::LN_2));
matrix0.fill_column(7, F16x1(f16::LN_10));
writeln!(
&mut uart,
"Cache Lines: {}\nRows: {}",
PimMatrixArena::<8, 8>::OCCUPIED_CACHE_LINES,
PimMatrixArena::<8, 8>::OCCUPIED_ROWS
)
.unwrap();
writeln!(&mut uart, "{matrix0} * 2\n=").unwrap();
// let mut compute_array: ComputeArray<3> = ComputeArray([
// BankArray([F16x16([f16::from_f32(0.1); 16]); 32]),
// BankArray([f16::from_f32(0.2); 512]),
// BankArray([f16::from_f32(0.3); 512]),
// ]);
// let dummy_array = BankArray::default();
// writeln!(
// &mut uart,
// "PIM array is at {:x?}",
// core::ptr::addr_of!(compute_array)
// )
// .unwrap();
// writeln!(
// &mut uart,
// "BankArray0: [{:?}, ...]\nBankArray1: [{:?}, ...]\nBankArray2: [{:?}, ...]",
// compute_array.0[0].0[0], compute_array.0[1].0[0], compute_array.0[2].0[0]
// )
// .unwrap();
// writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap();
// Invalidate and flush array just in case
pim_matrix_arena.borrow_mut().invalidate_flush();
// dummy_array.invalidate_flush();
@@ -98,21 +81,11 @@ pub extern "C" fn entry() -> ! {
pim_state.set_bank_mode(BankMode::SingleBank);
pim_matrix_arena.borrow_mut().invalidate();
// compute_array.invalidate();
barrier::dsb(barrier::SY);
// writeln!(&mut uart, "{matrix0}+{matrix1}").unwrap();
writeln!(&mut uart, "{matrix0}").unwrap();
// writeln!(
// &mut uart,
// "BankArray2: [{:?}, ...]",
// compute_array.0[2].0[0]
// )
// .unwrap();
// writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap();
m5ops::exit();
loop {

View File

@@ -1,11 +1,10 @@
use super::matrix::{F16x1, F16x16};
use aarch64_cpu::asm::barrier;
use core::panic;
use core::{arch::asm, cell::RefCell};
use half::f16;
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut, SMatrix, Storage};
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut};
const TOTAL_BANKS: usize = 32;
const NUMBER_OF_BANKS: usize = 32;
const EVEN_BANK_INDEX: usize = 0;
const ODD_BANK_INDEX: usize = 8;
@@ -14,7 +13,8 @@ const ODD_BANK_INDEX: usize = 8;
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[F16x16; R]; C]);
impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
const NUMBER_OF_BANKS: usize = R * C;
const OCCUPIED_CACHE_LINES: usize = R * C;
const OCCUPIED_ROWS: usize = Self::OCCUPIED_CACHE_LINES / NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
unsafe { (self.0.as_ptr() as *const F16x16).offset(bank_index as _) as *const f16 }
@@ -31,24 +31,6 @@ pub struct PimStorage<'a, const R: usize, const C: usize> {
pub index: usize,
}
// impl<'a, const R: usize, const C: usize> PimRegion for PimStorage<'a, R, C> {
// const NUMBER_OF_BANKS: usize = R * C;
// fn bank_ptr(&self, bank_index: usize) -> *const f16 {
// unsafe {
// (self.arena.borrow().0.as_ptr() as *const F16x16).offset((self.index + bank_index) as _)
// as *const f16
// }
// }
// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
// unsafe {
// (self.arena.borrow_mut().0.as_mut_ptr() as *mut F16x16)
// .offset((self.index + bank_index) as _) as *mut f16
// }
// }
// }
unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Const<C>>
for PimStorage<'a, R, C>
{
@@ -94,7 +76,8 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorageMut<F16x1, Const<R>, C
}
pub trait PimRegion {
const NUMBER_OF_BANKS: usize;
const OCCUPIED_CACHE_LINES: usize;
const OCCUPIED_ROWS: usize;
fn bank_ptr(&self, bank_index: usize) -> *const f16;
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16;
@@ -112,26 +95,26 @@ pub trait PimRegion {
}
fn execute_instruction_read_dual_bank(&self) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX);
self.invalidate_bank(ODD_BANK_INDEX);
for i in (0..Self::OCCUPIED_ROWS).map(|i| i * NUMBER_OF_BANKS) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i);
self.invalidate_bank(ODD_BANK_INDEX + i);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX + i);
self.read_data_bank(ODD_BANK_INDEX + i);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX);
self.read_data_bank(ODD_BANK_INDEX);
barrier::dsb(barrier::SY);
}
fn read_data_bank(&self, bank_index: usize) {
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
let bank = self.bank_ptr(bank_index * i);
unsafe {
core::ptr::read_volatile(bank);
}
let bank = self.bank_ptr(bank_index);
unsafe {
core::ptr::read_volatile(bank);
}
}
@@ -152,78 +135,74 @@ pub trait PimRegion {
}
fn execute_instruction_write_dual_bank(&mut self) {
// if !cfg!(feature = "cacheless") {
// self.preload_zero();
// barrier::dsb(barrier::SY);
// }
for i in (0..Self::OCCUPIED_ROWS).map(|i| i * NUMBER_OF_BANKS) {
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i);
self.preload_zero_bank(ODD_BANK_INDEX + i);
barrier::dsb(barrier::SY);
}
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX);
self.write_data_bank(ODD_BANK_INDEX);
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX + i);
self.write_data_bank(ODD_BANK_INDEX + i);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX);
self.invalidate_flush_bank(ODD_BANK_INDEX);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i);
self.invalidate_flush_bank(ODD_BANK_INDEX + i);
}
barrier::dsb(barrier::SY);
}
barrier::dsb(barrier::SY);
}
fn write_data_bank(&mut self, bank_index: usize) {
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
let bank = self.bank_ptr_mut(bank_index * i);
unsafe {
core::ptr::write_volatile(bank, Default::default());
}
let bank = self.bank_ptr_mut(bank_index);
unsafe {
core::ptr::write_volatile(bank, Default::default());
}
}
fn invalidate(&self) {
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx));
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_bank(idx));
}
fn invalidate_bank(&self, bank_index: usize) {
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
let bank = self.bank_ptr(bank_index * i);
unsafe {
asm!("dc ivac, {val}", val = in(reg) bank);
}
let bank = self.bank_ptr(bank_index);
unsafe {
asm!("dc ivac, {val}", val = in(reg) bank);
}
}
fn invalidate_flush(&self) {
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx));
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_flush_bank(idx));
}
fn invalidate_flush_bank(&self, bank_index: usize) {
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
let bank = self.bank_ptr(bank_index * i);
unsafe {
asm!("dc civac, {val}", val = in(reg) bank);
}
let bank = self.bank_ptr(bank_index);
unsafe {
asm!("dc civac, {val}", val = in(reg) bank);
}
}
fn preload_zero(&self) {
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx));
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.preload_zero_bank(idx));
}
fn preload_zero_bank(&self, bank_index: usize) {
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
let bank = self.bank_ptr(bank_index * i);
unsafe {
// Preload first bank
asm!("dc zva, {val}", val = in(reg) bank);
}
let bank = self.bank_ptr(bank_index);
unsafe {
// Preload first bank
asm!("dc zva, {val}", val = in(reg) bank);
}
}
}
#[repr(C, align(1024))]
pub struct DummyArray(pub [F16x16; TOTAL_BANKS]);
pub struct DummyArray(pub [F16x16; NUMBER_OF_BANKS]);
impl PimRegion for DummyArray {
const NUMBER_OF_BANKS: usize = TOTAL_BANKS;
const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS;
const OCCUPIED_ROWS: usize = 1;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
&self.0[bank_index] as *const F16x16 as *const f16
@@ -233,25 +212,3 @@ impl PimRegion for DummyArray {
&mut self.0[bank_index] as *mut F16x16 as *mut f16
}
}
#[derive(Clone, Debug)]
#[repr(C, align(65536))]
pub struct ComputeArray<T: PimRegion, const N: usize>(pub [T; N]);
impl<T: PimRegion, const N: usize> ComputeArray<T, N> {
pub fn invalidate_flush(&self) {
self.0
.iter()
.for_each(|bank_array| bank_array.invalidate_flush());
}
pub fn invalidate(&self) {
self.0.iter().for_each(|bank_array| bank_array.invalidate());
}
}
impl<T: PimRegion + Default, const N: usize> Default for ComputeArray<T, N> {
fn default() -> Self {
Self(core::array::from_fn(|_| Default::default()))
}
}

View File

@@ -9,6 +9,14 @@ pub const TEST_KERNEL: Kernel = Kernel([
src: File::Bank,
dst: File::GrfB { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfB { index: 1 },
},
Instruction::ADD {
src0: File::GrfA { index: 0 },
src1: File::GrfA { index: 0 },
@@ -21,6 +29,18 @@ pub const TEST_KERNEL: Kernel = Kernel([
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::GrfA { index: 1 },
src1: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::ADD {
src0: File::GrfB { index: 1 },
src1: File::GrfB { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
// Instruction::MOV {
// src: File::Bank,
// dst: File::GrfA { index: 1 },
@@ -51,6 +71,14 @@ pub const TEST_KERNEL: Kernel = Kernel([
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
@@ -71,10 +99,4 @@ pub const TEST_KERNEL: Kernel = Kernel([
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);