Further implementation of matrix support
This commit is contained in:
@@ -12,7 +12,7 @@ use core::{
|
||||
use half::f16;
|
||||
use nalgebra::{Const, Matrix, Matrix2, SMatrixView};
|
||||
use pim::{
|
||||
array::{PimMatrixArena, PimStorage},
|
||||
array::{PimMatrixArena, PimRegion, PimStorage},
|
||||
// array::PimMatrix,
|
||||
// array::{BankArray, ComputeArray},
|
||||
kernel::TEST_KERNEL,
|
||||
@@ -32,25 +32,30 @@ pub extern "C" fn entry() -> ! {
|
||||
let mut uart = Uart0;
|
||||
let mut pim_state = PimState::new(&TEST_KERNEL);
|
||||
|
||||
let mut arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
|
||||
let mut pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
|
||||
let pim_storage0 = PimStorage {
|
||||
arena: &arena,
|
||||
arena: &pim_matrix_arena,
|
||||
index: 0,
|
||||
};
|
||||
let pim_storage1 = PimStorage {
|
||||
arena: &arena,
|
||||
arena: &pim_matrix_arena,
|
||||
index: 1,
|
||||
};
|
||||
let pim_storage2 = PimStorage {
|
||||
arena: &arena,
|
||||
arena: &pim_matrix_arena,
|
||||
index: 2,
|
||||
};
|
||||
let mut matrix0 = Matrix::from_data(pim_storage0);
|
||||
let mut matrix1 = Matrix::from_data(pim_storage1);
|
||||
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
|
||||
matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0);
|
||||
writeln!(&mut uart, "{}", matrix0).unwrap();
|
||||
writeln!(&mut uart, "{}", matrix1).unwrap();
|
||||
matrix0.fill_column(0, F16x1(f16::ZERO));
|
||||
matrix0.fill_column(1, F16x1(f16::ONE));
|
||||
matrix0.fill_column(2, F16x1(f16::PI));
|
||||
matrix0.fill_column(3, F16x1(f16::E));
|
||||
matrix0.fill_column(4, F16x1(f16::EPSILON));
|
||||
matrix0.fill_column(5, F16x1(f16::SQRT_2));
|
||||
// matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
|
||||
// matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0);
|
||||
writeln!(&mut uart, "{matrix0} * 2\n=").unwrap();
|
||||
|
||||
// let mut compute_array: ComputeArray<3> = ComputeArray([
|
||||
// BankArray([F16x16([f16::from_f32(0.1); 16]); 32]),
|
||||
@@ -75,21 +80,29 @@ pub extern "C" fn entry() -> ! {
|
||||
|
||||
// writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap();
|
||||
|
||||
// // Invalidate and flush array just in case
|
||||
// compute_array.invalidate_flush();
|
||||
// Invalidate and flush array just in case
|
||||
pim_matrix_arena.borrow_mut().invalidate_flush();
|
||||
// dummy_array.invalidate_flush();
|
||||
// barrier::dsb(barrier::SY);
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
// pim_state.set_bank_mode(BankMode::PimAllBank);
|
||||
// compute_array.0[1].execute_instruction_read_dual_bank();
|
||||
// compute_array.0[2].execute_instruction_read_dual_bank();
|
||||
// compute_array.0[0].execute_instruction_read_dual_bank();
|
||||
// compute_array.0[2].execute_instruction_write_dual_bank();
|
||||
// dummy_array.execute_instruction_read_single_bank();
|
||||
// pim_state.set_bank_mode(BankMode::SingleBank);
|
||||
pim_state.set_bank_mode(BankMode::PimAllBank);
|
||||
pim_matrix_arena
|
||||
.borrow_mut()
|
||||
.execute_instruction_read_dual_bank();
|
||||
pim_matrix_arena
|
||||
.borrow_mut()
|
||||
.execute_instruction_read_dual_bank();
|
||||
pim_matrix_arena
|
||||
.borrow_mut()
|
||||
.execute_instruction_write_dual_bank();
|
||||
pim_state.set_bank_mode(BankMode::SingleBank);
|
||||
|
||||
pim_matrix_arena.borrow_mut().invalidate();
|
||||
// compute_array.invalidate();
|
||||
// barrier::dsb(barrier::SY);
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
// writeln!(&mut uart, "{matrix0}+{matrix1}").unwrap();
|
||||
writeln!(&mut uart, "{matrix0}").unwrap();
|
||||
|
||||
// writeln!(
|
||||
// &mut uart,
|
||||
|
||||
@@ -5,7 +5,7 @@ use core::{arch::asm, cell::RefCell};
|
||||
use half::f16;
|
||||
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut, SMatrix, Storage};
|
||||
|
||||
// const NUMBER_OF_BANKS: usize = 32;
|
||||
const TOTAL_BANKS: usize = 32;
|
||||
const EVEN_BANK_INDEX: usize = 0;
|
||||
const ODD_BANK_INDEX: usize = 8;
|
||||
|
||||
@@ -13,12 +13,42 @@ const ODD_BANK_INDEX: usize = 8;
|
||||
#[repr(C, align(1024))]
|
||||
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[F16x16; R]; C]);
|
||||
|
||||
impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
|
||||
const NUMBER_OF_BANKS: usize = R * C;
|
||||
|
||||
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
|
||||
unsafe { (self.0.as_ptr() as *const F16x16).offset(bank_index as _) as *const f16 }
|
||||
}
|
||||
|
||||
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
|
||||
unsafe { (self.0.as_mut_ptr() as *mut F16x16).offset(bank_index as _) as *mut f16 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PimStorage<'a, const R: usize, const C: usize> {
|
||||
pub arena: &'a RefCell<PimMatrixArena<R, C>>,
|
||||
pub index: usize,
|
||||
}
|
||||
|
||||
// impl<'a, const R: usize, const C: usize> PimRegion for PimStorage<'a, R, C> {
|
||||
// const NUMBER_OF_BANKS: usize = R * C;
|
||||
|
||||
// fn bank_ptr(&self, bank_index: usize) -> *const f16 {
|
||||
// unsafe {
|
||||
// (self.arena.borrow().0.as_ptr() as *const F16x16).offset((self.index + bank_index) as _)
|
||||
// as *const f16
|
||||
// }
|
||||
// }
|
||||
|
||||
// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
|
||||
// unsafe {
|
||||
// (self.arena.borrow_mut().0.as_mut_ptr() as *mut F16x16)
|
||||
// .offset((self.index + bank_index) as _) as *mut f16
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Const<C>>
|
||||
for PimStorage<'a, R, C>
|
||||
{
|
||||
@@ -63,22 +93,6 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorageMut<F16x1, Const<R>, C
|
||||
}
|
||||
}
|
||||
|
||||
// #[repr(C, align(1024))]
|
||||
// #[derive(Clone, Debug, Default)]
|
||||
// pub struct PimMatrix(pub SMatrix<f166, 8, 8>);
|
||||
|
||||
// impl PimRegion for PimMatrix {
|
||||
// const NUMBER_OF_BANKS: usize = 64;
|
||||
|
||||
// fn bank_ptr(&self, bank_index: usize) -> *const f16 {
|
||||
// return &self.0[bank_index].0 as _;
|
||||
// }
|
||||
|
||||
// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
|
||||
// return &mut self.0[bank_index].0 as _;
|
||||
// }
|
||||
// }
|
||||
|
||||
pub trait PimRegion {
|
||||
const NUMBER_OF_BANKS: usize;
|
||||
|
||||
@@ -88,7 +102,6 @@ pub trait PimRegion {
|
||||
fn execute_instruction_read_single_bank(&self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.invalidate_bank(EVEN_BANK_INDEX);
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
@@ -114,9 +127,11 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn read_data_bank(&self, bank_index: usize) {
|
||||
let bank = self.bank_ptr(bank_index);
|
||||
unsafe {
|
||||
core::ptr::read_volatile(bank);
|
||||
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
|
||||
let bank = self.bank_ptr(bank_index * i);
|
||||
unsafe {
|
||||
core::ptr::read_volatile(bank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -137,10 +152,10 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn execute_instruction_write_dual_bank(&mut self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.preload_zero();
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
// if !cfg!(feature = "cacheless") {
|
||||
// self.preload_zero();
|
||||
// barrier::dsb(barrier::SY);
|
||||
// }
|
||||
|
||||
// Write to first and second bank
|
||||
self.write_data_bank(EVEN_BANK_INDEX);
|
||||
@@ -155,9 +170,11 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn write_data_bank(&mut self, bank_index: usize) {
|
||||
let bank = self.bank_ptr_mut(bank_index);
|
||||
unsafe {
|
||||
core::ptr::write_volatile(bank, Default::default());
|
||||
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
|
||||
let bank = self.bank_ptr_mut(bank_index * i);
|
||||
unsafe {
|
||||
core::ptr::write_volatile(bank, Default::default());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,9 +183,11 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn invalidate_bank(&self, bank_index: usize) {
|
||||
let bank = self.bank_ptr(bank_index);
|
||||
unsafe {
|
||||
asm!("dc ivac, {val}", val = in(reg) bank);
|
||||
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
|
||||
let bank = self.bank_ptr(bank_index * i);
|
||||
unsafe {
|
||||
asm!("dc ivac, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,9 +196,11 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn invalidate_flush_bank(&self, bank_index: usize) {
|
||||
let bank = self.bank_ptr(bank_index);
|
||||
unsafe {
|
||||
asm!("dc civac, {val}", val = in(reg) bank);
|
||||
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
|
||||
let bank = self.bank_ptr(bank_index * i);
|
||||
unsafe {
|
||||
asm!("dc civac, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,14 +209,31 @@ pub trait PimRegion {
|
||||
}
|
||||
|
||||
fn preload_zero_bank(&self, bank_index: usize) {
|
||||
let bank = self.bank_ptr(bank_index);
|
||||
unsafe {
|
||||
// Preload first bank
|
||||
asm!("dc zva, {val}", val = in(reg) bank);
|
||||
for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS {
|
||||
let bank = self.bank_ptr(bank_index * i);
|
||||
unsafe {
|
||||
// Preload first bank
|
||||
asm!("dc zva, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C, align(1024))]
|
||||
pub struct DummyArray(pub [F16x16; TOTAL_BANKS]);
|
||||
|
||||
impl PimRegion for DummyArray {
|
||||
const NUMBER_OF_BANKS: usize = TOTAL_BANKS;
|
||||
|
||||
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
|
||||
&self.0[bank_index] as *const F16x16 as *const f16
|
||||
}
|
||||
|
||||
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
|
||||
&mut self.0[bank_index] as *mut F16x16 as *mut f16
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(C, align(65536))]
|
||||
pub struct ComputeArray<T: PimRegion, const N: usize>(pub [T; N]);
|
||||
|
||||
@@ -9,34 +9,46 @@ pub const TEST_KERNEL: Kernel = Kernel([
|
||||
src: File::Bank,
|
||||
dst: File::GrfB { index: 0 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 1 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfB { index: 1 },
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
Instruction::ADD {
|
||||
src0: File::GrfA { index: 0 },
|
||||
src1: File::GrfA { index: 0 },
|
||||
src2: File::GrfB { index: 0 },
|
||||
dst: File::GrfA { index: 0 },
|
||||
aam: false,
|
||||
},
|
||||
Instruction::ADD {
|
||||
src0: File::GrfB { index: 0 },
|
||||
src1: File::GrfB { index: 0 },
|
||||
dst: File::GrfB { index: 0 },
|
||||
aam: false,
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfA { index: 1 },
|
||||
src2: File::GrfB { index: 1 },
|
||||
dst: File::GrfB { index: 1 },
|
||||
aam: false,
|
||||
},
|
||||
// Instruction::MOV {
|
||||
// src: File::Bank,
|
||||
// dst: File::GrfA { index: 1 },
|
||||
// },
|
||||
// Instruction::MOV {
|
||||
// src: File::Bank,
|
||||
// dst: File::GrfB { index: 1 },
|
||||
// },
|
||||
// Instruction::MAC {
|
||||
// src0: File::Bank,
|
||||
// src1: File::GrfA { index: 0 },
|
||||
// src2: File::GrfB { index: 0 },
|
||||
// dst: File::GrfB { index: 0 },
|
||||
// aam: false,
|
||||
// },
|
||||
// Instruction::MAC {
|
||||
// src0: File::Bank,
|
||||
// src1: File::GrfA { index: 1 },
|
||||
// src2: File::GrfB { index: 1 },
|
||||
// dst: File::GrfB { index: 1 },
|
||||
// aam: false,
|
||||
// },
|
||||
Instruction::FILL {
|
||||
src: File::GrfA { index: 1 },
|
||||
src: File::GrfA { index: 0 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 1 },
|
||||
src: File::GrfB { index: 0 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::EXIT,
|
||||
@@ -63,4 +75,6 @@ pub const TEST_KERNEL: Kernel = Kernel([
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
Reference in New Issue
Block a user