From 66a688c2e91fa71a0c1a801fb956ce4a31a336fa Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Tue, 19 Dec 2023 16:59:20 +0100 Subject: [PATCH] Further implementation of matrix support --- pim-os/src/main.rs | 53 +++++++++++------- pim-os/src/pim/array.rs | 114 ++++++++++++++++++++++++++------------- pim-os/src/pim/kernel.rs | 54 ++++++++++++------- 3 files changed, 143 insertions(+), 78 deletions(-) diff --git a/pim-os/src/main.rs b/pim-os/src/main.rs index 0ba0e4f..6fc0f33 100644 --- a/pim-os/src/main.rs +++ b/pim-os/src/main.rs @@ -12,7 +12,7 @@ use core::{ use half::f16; use nalgebra::{Const, Matrix, Matrix2, SMatrixView}; use pim::{ - array::{PimMatrixArena, PimStorage}, + array::{PimMatrixArena, PimRegion, PimStorage}, // array::PimMatrix, // array::{BankArray, ComputeArray}, kernel::TEST_KERNEL, @@ -32,25 +32,30 @@ pub extern "C" fn entry() -> ! { let mut uart = Uart0; let mut pim_state = PimState::new(&TEST_KERNEL); - let mut arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8])); + let mut pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8])); let pim_storage0 = PimStorage { - arena: &arena, + arena: &pim_matrix_arena, index: 0, }; let pim_storage1 = PimStorage { - arena: &arena, + arena: &pim_matrix_arena, index: 1, }; let pim_storage2 = PimStorage { - arena: &arena, + arena: &pim_matrix_arena, index: 2, }; let mut matrix0 = Matrix::from_data(pim_storage0); let mut matrix1 = Matrix::from_data(pim_storage1); - matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); - matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0); - writeln!(&mut uart, "{}", matrix0).unwrap(); - writeln!(&mut uart, "{}", matrix1).unwrap(); + matrix0.fill_column(0, F16x1(f16::ZERO)); + matrix0.fill_column(1, F16x1(f16::ONE)); + matrix0.fill_column(2, F16x1(f16::PI)); + matrix0.fill_column(3, F16x1(f16::E)); + matrix0.fill_column(4, F16x1(f16::EPSILON)); + matrix0.fill_column(5, F16x1(f16::SQRT_2)); + // matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); + // matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0); + writeln!(&mut uart, "{matrix0} * 2\n=").unwrap(); // let mut compute_array: ComputeArray<3> = ComputeArray([ // BankArray([F16x16([f16::from_f32(0.1); 16]); 32]), @@ -75,21 +80,29 @@ pub extern "C" fn entry() -> ! { // writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap(); - // // Invalidate and flush array just in case - // compute_array.invalidate_flush(); + // Invalidate and flush array just in case + pim_matrix_arena.borrow_mut().invalidate_flush(); // dummy_array.invalidate_flush(); - // barrier::dsb(barrier::SY); + barrier::dsb(barrier::SY); - // pim_state.set_bank_mode(BankMode::PimAllBank); - // compute_array.0[1].execute_instruction_read_dual_bank(); - // compute_array.0[2].execute_instruction_read_dual_bank(); - // compute_array.0[0].execute_instruction_read_dual_bank(); - // compute_array.0[2].execute_instruction_write_dual_bank(); - // dummy_array.execute_instruction_read_single_bank(); - // pim_state.set_bank_mode(BankMode::SingleBank); + pim_state.set_bank_mode(BankMode::PimAllBank); + pim_matrix_arena + .borrow_mut() + .execute_instruction_read_dual_bank(); + pim_matrix_arena + .borrow_mut() + .execute_instruction_read_dual_bank(); + pim_matrix_arena + .borrow_mut() + .execute_instruction_write_dual_bank(); + pim_state.set_bank_mode(BankMode::SingleBank); + pim_matrix_arena.borrow_mut().invalidate(); // compute_array.invalidate(); - // barrier::dsb(barrier::SY); + barrier::dsb(barrier::SY); + + // writeln!(&mut uart, "{matrix0}+{matrix1}").unwrap(); + writeln!(&mut uart, "{matrix0}").unwrap(); // writeln!( // &mut uart, diff --git a/pim-os/src/pim/array.rs b/pim-os/src/pim/array.rs index afd3052..53f7824 100644 --- a/pim-os/src/pim/array.rs +++ b/pim-os/src/pim/array.rs @@ -5,7 +5,7 @@ use core::{arch::asm, cell::RefCell}; use half::f16; use nalgebra::{Const, Dyn, RawStorage, RawStorageMut, SMatrix, Storage}; -// const NUMBER_OF_BANKS: usize = 32; +const TOTAL_BANKS: usize = 32; const EVEN_BANK_INDEX: usize = 0; const ODD_BANK_INDEX: usize = 8; @@ -13,12 +13,42 @@ const ODD_BANK_INDEX: usize = 8; #[repr(C, align(1024))] pub struct PimMatrixArena(pub [[F16x16; R]; C]); +impl PimRegion for PimMatrixArena { + const NUMBER_OF_BANKS: usize = R * C; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + unsafe { (self.0.as_ptr() as *const F16x16).offset(bank_index as _) as *const f16 } + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + unsafe { (self.0.as_mut_ptr() as *mut F16x16).offset(bank_index as _) as *mut f16 } + } +} + #[derive(Debug)] pub struct PimStorage<'a, const R: usize, const C: usize> { pub arena: &'a RefCell>, pub index: usize, } +// impl<'a, const R: usize, const C: usize> PimRegion for PimStorage<'a, R, C> { +// const NUMBER_OF_BANKS: usize = R * C; + +// fn bank_ptr(&self, bank_index: usize) -> *const f16 { +// unsafe { +// (self.arena.borrow().0.as_ptr() as *const F16x16).offset((self.index + bank_index) as _) +// as *const f16 +// } +// } + +// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { +// unsafe { +// (self.arena.borrow_mut().0.as_mut_ptr() as *mut F16x16) +// .offset((self.index + bank_index) as _) as *mut f16 +// } +// } +// } + unsafe impl<'a, const R: usize, const C: usize> RawStorage, Const> for PimStorage<'a, R, C> { @@ -63,22 +93,6 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorageMut, C } } -// #[repr(C, align(1024))] -// #[derive(Clone, Debug, Default)] -// pub struct PimMatrix(pub SMatrix); - -// impl PimRegion for PimMatrix { -// const NUMBER_OF_BANKS: usize = 64; - -// fn bank_ptr(&self, bank_index: usize) -> *const f16 { -// return &self.0[bank_index].0 as _; -// } - -// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { -// return &mut self.0[bank_index].0 as _; -// } -// } - pub trait PimRegion { const NUMBER_OF_BANKS: usize; @@ -88,7 +102,6 @@ pub trait PimRegion { fn execute_instruction_read_single_bank(&self) { if !cfg!(feature = "cacheless") { self.invalidate_bank(EVEN_BANK_INDEX); - barrier::dsb(barrier::SY); } @@ -114,9 +127,11 @@ pub trait PimRegion { } fn read_data_bank(&self, bank_index: usize) { - let bank = self.bank_ptr(bank_index); - unsafe { - core::ptr::read_volatile(bank); + for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { + let bank = self.bank_ptr(bank_index * i); + unsafe { + core::ptr::read_volatile(bank); + } } } @@ -137,10 +152,10 @@ pub trait PimRegion { } fn execute_instruction_write_dual_bank(&mut self) { - if !cfg!(feature = "cacheless") { - self.preload_zero(); - barrier::dsb(barrier::SY); - } + // if !cfg!(feature = "cacheless") { + // self.preload_zero(); + // barrier::dsb(barrier::SY); + // } // Write to first and second bank self.write_data_bank(EVEN_BANK_INDEX); @@ -155,9 +170,11 @@ pub trait PimRegion { } fn write_data_bank(&mut self, bank_index: usize) { - let bank = self.bank_ptr_mut(bank_index); - unsafe { - core::ptr::write_volatile(bank, Default::default()); + for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { + let bank = self.bank_ptr_mut(bank_index * i); + unsafe { + core::ptr::write_volatile(bank, Default::default()); + } } } @@ -166,9 +183,11 @@ pub trait PimRegion { } fn invalidate_bank(&self, bank_index: usize) { - let bank = self.bank_ptr(bank_index); - unsafe { - asm!("dc ivac, {val}", val = in(reg) bank); + for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { + let bank = self.bank_ptr(bank_index * i); + unsafe { + asm!("dc ivac, {val}", val = in(reg) bank); + } } } @@ -177,9 +196,11 @@ pub trait PimRegion { } fn invalidate_flush_bank(&self, bank_index: usize) { - let bank = self.bank_ptr(bank_index); - unsafe { - asm!("dc civac, {val}", val = in(reg) bank); + for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { + let bank = self.bank_ptr(bank_index * i); + unsafe { + asm!("dc civac, {val}", val = in(reg) bank); + } } } @@ -188,14 +209,31 @@ pub trait PimRegion { } fn preload_zero_bank(&self, bank_index: usize) { - let bank = self.bank_ptr(bank_index); - unsafe { - // Preload first bank - asm!("dc zva, {val}", val = in(reg) bank); + for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { + let bank = self.bank_ptr(bank_index * i); + unsafe { + // Preload first bank + asm!("dc zva, {val}", val = in(reg) bank); + } } } } +#[repr(C, align(1024))] +pub struct DummyArray(pub [F16x16; TOTAL_BANKS]); + +impl PimRegion for DummyArray { + const NUMBER_OF_BANKS: usize = TOTAL_BANKS; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + &self.0[bank_index] as *const F16x16 as *const f16 + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + &mut self.0[bank_index] as *mut F16x16 as *mut f16 + } +} + #[derive(Clone, Debug)] #[repr(C, align(65536))] pub struct ComputeArray(pub [T; N]); diff --git a/pim-os/src/pim/kernel.rs b/pim-os/src/pim/kernel.rs index e11b0e9..0a89809 100644 --- a/pim-os/src/pim/kernel.rs +++ b/pim-os/src/pim/kernel.rs @@ -9,34 +9,46 @@ pub const TEST_KERNEL: Kernel = Kernel([ src: File::Bank, dst: File::GrfB { index: 0 }, }, - Instruction::MOV { - src: File::Bank, - dst: File::GrfA { index: 1 }, - }, - Instruction::MOV { - src: File::Bank, - dst: File::GrfB { index: 1 }, - }, - Instruction::MAC { - src0: File::Bank, + Instruction::ADD { + src0: File::GrfA { index: 0 }, src1: File::GrfA { index: 0 }, - src2: File::GrfB { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::GrfB { index: 0 }, + src1: File::GrfB { index: 0 }, dst: File::GrfB { index: 0 }, aam: false, }, - Instruction::MAC { - src0: File::Bank, - src1: File::GrfA { index: 1 }, - src2: File::GrfB { index: 1 }, - dst: File::GrfB { index: 1 }, - aam: false, - }, + // Instruction::MOV { + // src: File::Bank, + // dst: File::GrfA { index: 1 }, + // }, + // Instruction::MOV { + // src: File::Bank, + // dst: File::GrfB { index: 1 }, + // }, + // Instruction::MAC { + // src0: File::Bank, + // src1: File::GrfA { index: 0 }, + // src2: File::GrfB { index: 0 }, + // dst: File::GrfB { index: 0 }, + // aam: false, + // }, + // Instruction::MAC { + // src0: File::Bank, + // src1: File::GrfA { index: 1 }, + // src2: File::GrfB { index: 1 }, + // dst: File::GrfB { index: 1 }, + // aam: false, + // }, Instruction::FILL { - src: File::GrfA { index: 1 }, + src: File::GrfA { index: 0 }, dst: File::Bank, }, Instruction::FILL { - src: File::GrfB { index: 1 }, + src: File::GrfB { index: 0 }, dst: File::Bank, }, Instruction::EXIT, @@ -63,4 +75,6 @@ pub const TEST_KERNEL: Kernel = Kernel([ Instruction::NOP, Instruction::NOP, Instruction::NOP, + Instruction::NOP, + Instruction::NOP, ]);