diff --git a/pim-os/Cargo.lock b/pim-os/Cargo.lock index 42b0091..287082e 100644 --- a/pim-os/Cargo.lock +++ b/pim-os/Cargo.lock @@ -12,10 +12,19 @@ dependencies = [ ] [[package]] -name = "atomic-polyfill" -version = "0.1.11" +name = "approx" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ff7eb3f316534d83a8a2c3d1674ace8a5a71198eba31e2e2b597833f699b28" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" dependencies = [ "critical-section", ] @@ -71,9 +80,9 @@ dependencies = [ [[package]] name = "heapless" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" dependencies = [ "atomic-polyfill", "hash32", @@ -92,6 +101,65 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "nalgebra" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307ed9b18cc2423f29e83f84fd23a8e73628727990181f18641a8b5dc2ab1caa" +dependencies = [ + "approx", + "num-complex", + "num-rational", + "num-traits", + "simba", + "typenum", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "pim-isa" version = "0.1.0" @@ -105,6 +173,8 @@ version = "0.1.0" dependencies = [ "aarch64-cpu", "half", + "nalgebra", + "num-traits", "pim-isa", "serde", "serde-json-core", @@ -112,9 +182,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" dependencies = [ "unicode-ident", ] @@ -139,9 +209,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "scopeguard" @@ -186,6 +256,18 @@ dependencies = [ "syn", ] +[[package]] +name = "simba" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", +] + [[package]] name = "spin" version = "0.9.8" @@ -203,9 +285,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" -version = "2.0.39" +version = "2.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8" dependencies = [ "proc-macro2", "quote", @@ -218,6 +300,12 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/pim-os/rust-toolchain.toml b/pim-os/rust-toolchain.toml new file mode 100644 index 0000000..5d56faf --- /dev/null +++ b/pim-os/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/pim-os/src/main.rs b/pim-os/src/main.rs index 6fc0f33..b1052d9 100644 --- a/pim-os/src/main.rs +++ b/pim-os/src/main.rs @@ -10,11 +10,9 @@ use core::{ sync::atomic::{compiler_fence, Ordering}, }; use half::f16; -use nalgebra::{Const, Matrix, Matrix2, SMatrixView}; +use nalgebra::Matrix; use pim::{ array::{PimMatrixArena, PimRegion, PimStorage}, - // array::PimMatrix, - // array::{BankArray, ComputeArray}, kernel::TEST_KERNEL, matrix::{F16x1, F16x16}, state::PimState, @@ -32,7 +30,7 @@ pub extern "C" fn entry() -> ! { let mut uart = Uart0; let mut pim_state = PimState::new(&TEST_KERNEL); - let mut pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8])); + let pim_matrix_arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8])); let pim_storage0 = PimStorage { arena: &pim_matrix_arena, index: 0, @@ -46,40 +44,25 @@ pub extern "C" fn entry() -> ! { index: 2, }; let mut matrix0 = Matrix::from_data(pim_storage0); - let mut matrix1 = Matrix::from_data(pim_storage1); + let matrix1 = Matrix::from_data(pim_storage1); matrix0.fill_column(0, F16x1(f16::ZERO)); matrix0.fill_column(1, F16x1(f16::ONE)); matrix0.fill_column(2, F16x1(f16::PI)); matrix0.fill_column(3, F16x1(f16::E)); matrix0.fill_column(4, F16x1(f16::EPSILON)); matrix0.fill_column(5, F16x1(f16::SQRT_2)); - // matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); - // matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0); + matrix0.fill_column(6, F16x1(f16::LN_2)); + matrix0.fill_column(7, F16x1(f16::LN_10)); + + writeln!( + &mut uart, + "Cache Lines: {}\nRows: {}", + PimMatrixArena::<8, 8>::OCCUPIED_CACHE_LINES, + PimMatrixArena::<8, 8>::OCCUPIED_ROWS + ) + .unwrap(); writeln!(&mut uart, "{matrix0} * 2\n=").unwrap(); - // let mut compute_array: ComputeArray<3> = ComputeArray([ - // BankArray([F16x16([f16::from_f32(0.1); 16]); 32]), - // BankArray([f16::from_f32(0.2); 512]), - // BankArray([f16::from_f32(0.3); 512]), - // ]); - // let dummy_array = BankArray::default(); - - // writeln!( - // &mut uart, - // "PIM array is at {:x?}", - // core::ptr::addr_of!(compute_array) - // ) - // .unwrap(); - - // writeln!( - // &mut uart, - // "BankArray0: [{:?}, ...]\nBankArray1: [{:?}, ...]\nBankArray2: [{:?}, ...]", - // compute_array.0[0].0[0], compute_array.0[1].0[0], compute_array.0[2].0[0] - // ) - // .unwrap(); - - // writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap(); - // Invalidate and flush array just in case pim_matrix_arena.borrow_mut().invalidate_flush(); // dummy_array.invalidate_flush(); @@ -98,21 +81,11 @@ pub extern "C" fn entry() -> ! { pim_state.set_bank_mode(BankMode::SingleBank); pim_matrix_arena.borrow_mut().invalidate(); - // compute_array.invalidate(); barrier::dsb(barrier::SY); // writeln!(&mut uart, "{matrix0}+{matrix1}").unwrap(); writeln!(&mut uart, "{matrix0}").unwrap(); - // writeln!( - // &mut uart, - // "BankArray2: [{:?}, ...]", - // compute_array.0[2].0[0] - // ) - // .unwrap(); - - // writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap(); - m5ops::exit(); loop { diff --git a/pim-os/src/pim/array.rs b/pim-os/src/pim/array.rs index 53f7824..3628684 100644 --- a/pim-os/src/pim/array.rs +++ b/pim-os/src/pim/array.rs @@ -1,11 +1,10 @@ use super::matrix::{F16x1, F16x16}; use aarch64_cpu::asm::barrier; -use core::panic; use core::{arch::asm, cell::RefCell}; use half::f16; -use nalgebra::{Const, Dyn, RawStorage, RawStorageMut, SMatrix, Storage}; +use nalgebra::{Const, Dyn, RawStorage, RawStorageMut}; -const TOTAL_BANKS: usize = 32; +const NUMBER_OF_BANKS: usize = 32; const EVEN_BANK_INDEX: usize = 0; const ODD_BANK_INDEX: usize = 8; @@ -14,7 +13,8 @@ const ODD_BANK_INDEX: usize = 8; pub struct PimMatrixArena(pub [[F16x16; R]; C]); impl PimRegion for PimMatrixArena { - const NUMBER_OF_BANKS: usize = R * C; + const OCCUPIED_CACHE_LINES: usize = R * C; + const OCCUPIED_ROWS: usize = Self::OCCUPIED_CACHE_LINES / NUMBER_OF_BANKS; fn bank_ptr(&self, bank_index: usize) -> *const f16 { unsafe { (self.0.as_ptr() as *const F16x16).offset(bank_index as _) as *const f16 } @@ -31,24 +31,6 @@ pub struct PimStorage<'a, const R: usize, const C: usize> { pub index: usize, } -// impl<'a, const R: usize, const C: usize> PimRegion for PimStorage<'a, R, C> { -// const NUMBER_OF_BANKS: usize = R * C; - -// fn bank_ptr(&self, bank_index: usize) -> *const f16 { -// unsafe { -// (self.arena.borrow().0.as_ptr() as *const F16x16).offset((self.index + bank_index) as _) -// as *const f16 -// } -// } - -// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { -// unsafe { -// (self.arena.borrow_mut().0.as_mut_ptr() as *mut F16x16) -// .offset((self.index + bank_index) as _) as *mut f16 -// } -// } -// } - unsafe impl<'a, const R: usize, const C: usize> RawStorage, Const> for PimStorage<'a, R, C> { @@ -94,7 +76,8 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorageMut, C } pub trait PimRegion { - const NUMBER_OF_BANKS: usize; + const OCCUPIED_CACHE_LINES: usize; + const OCCUPIED_ROWS: usize; fn bank_ptr(&self, bank_index: usize) -> *const f16; fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16; @@ -112,26 +95,26 @@ pub trait PimRegion { } fn execute_instruction_read_dual_bank(&self) { - if !cfg!(feature = "cacheless") { - self.invalidate_bank(EVEN_BANK_INDEX); - self.invalidate_bank(ODD_BANK_INDEX); + for i in (0..Self::OCCUPIED_ROWS).map(|i| i * NUMBER_OF_BANKS) { + if !cfg!(feature = "cacheless") { + self.invalidate_bank(EVEN_BANK_INDEX + i); + self.invalidate_bank(ODD_BANK_INDEX + i); + + barrier::dsb(barrier::SY); + } + + // Read from first and second bank + self.read_data_bank(EVEN_BANK_INDEX + i); + self.read_data_bank(ODD_BANK_INDEX + i); barrier::dsb(barrier::SY); } - - // Read from first and second bank - self.read_data_bank(EVEN_BANK_INDEX); - self.read_data_bank(ODD_BANK_INDEX); - - barrier::dsb(barrier::SY); } fn read_data_bank(&self, bank_index: usize) { - for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { - let bank = self.bank_ptr(bank_index * i); - unsafe { - core::ptr::read_volatile(bank); - } + let bank = self.bank_ptr(bank_index); + unsafe { + core::ptr::read_volatile(bank); } } @@ -152,78 +135,74 @@ pub trait PimRegion { } fn execute_instruction_write_dual_bank(&mut self) { - // if !cfg!(feature = "cacheless") { - // self.preload_zero(); - // barrier::dsb(barrier::SY); - // } + for i in (0..Self::OCCUPIED_ROWS).map(|i| i * NUMBER_OF_BANKS) { + if !cfg!(feature = "cacheless") { + self.preload_zero_bank(EVEN_BANK_INDEX + i); + self.preload_zero_bank(ODD_BANK_INDEX + i); + barrier::dsb(barrier::SY); + } - // Write to first and second bank - self.write_data_bank(EVEN_BANK_INDEX); - self.write_data_bank(ODD_BANK_INDEX); + // Write to first and second bank + self.write_data_bank(EVEN_BANK_INDEX + i); + self.write_data_bank(ODD_BANK_INDEX + i); - if !cfg!(feature = "cacheless") { - self.invalidate_flush_bank(EVEN_BANK_INDEX); - self.invalidate_flush_bank(ODD_BANK_INDEX); + if !cfg!(feature = "cacheless") { + self.invalidate_flush_bank(EVEN_BANK_INDEX + i); + self.invalidate_flush_bank(ODD_BANK_INDEX + i); + } + + barrier::dsb(barrier::SY); } - - barrier::dsb(barrier::SY); } fn write_data_bank(&mut self, bank_index: usize) { - for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { - let bank = self.bank_ptr_mut(bank_index * i); - unsafe { - core::ptr::write_volatile(bank, Default::default()); - } + let bank = self.bank_ptr_mut(bank_index); + unsafe { + core::ptr::write_volatile(bank, Default::default()); } } fn invalidate(&self) { - (0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx)); + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_bank(idx)); } fn invalidate_bank(&self, bank_index: usize) { - for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { - let bank = self.bank_ptr(bank_index * i); - unsafe { - asm!("dc ivac, {val}", val = in(reg) bank); - } + let bank = self.bank_ptr(bank_index); + unsafe { + asm!("dc ivac, {val}", val = in(reg) bank); } } fn invalidate_flush(&self) { - (0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx)); + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_flush_bank(idx)); } fn invalidate_flush_bank(&self, bank_index: usize) { - for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { - let bank = self.bank_ptr(bank_index * i); - unsafe { - asm!("dc civac, {val}", val = in(reg) bank); - } + let bank = self.bank_ptr(bank_index); + unsafe { + asm!("dc civac, {val}", val = in(reg) bank); } } fn preload_zero(&self) { - (0..Self::NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx)); + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.preload_zero_bank(idx)); } fn preload_zero_bank(&self, bank_index: usize) { - for i in 0..Self::NUMBER_OF_BANKS / TOTAL_BANKS { - let bank = self.bank_ptr(bank_index * i); - unsafe { - // Preload first bank - asm!("dc zva, {val}", val = in(reg) bank); - } + let bank = self.bank_ptr(bank_index); + unsafe { + // Preload first bank + asm!("dc zva, {val}", val = in(reg) bank); } } } #[repr(C, align(1024))] -pub struct DummyArray(pub [F16x16; TOTAL_BANKS]); +pub struct DummyArray(pub [F16x16; NUMBER_OF_BANKS]); impl PimRegion for DummyArray { - const NUMBER_OF_BANKS: usize = TOTAL_BANKS; + const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS; + const OCCUPIED_ROWS: usize = 1; fn bank_ptr(&self, bank_index: usize) -> *const f16 { &self.0[bank_index] as *const F16x16 as *const f16 @@ -233,25 +212,3 @@ impl PimRegion for DummyArray { &mut self.0[bank_index] as *mut F16x16 as *mut f16 } } - -#[derive(Clone, Debug)] -#[repr(C, align(65536))] -pub struct ComputeArray(pub [T; N]); - -impl ComputeArray { - pub fn invalidate_flush(&self) { - self.0 - .iter() - .for_each(|bank_array| bank_array.invalidate_flush()); - } - - pub fn invalidate(&self) { - self.0.iter().for_each(|bank_array| bank_array.invalidate()); - } -} - -impl Default for ComputeArray { - fn default() -> Self { - Self(core::array::from_fn(|_| Default::default())) - } -} diff --git a/pim-os/src/pim/kernel.rs b/pim-os/src/pim/kernel.rs index 0a89809..bc79c5f 100644 --- a/pim-os/src/pim/kernel.rs +++ b/pim-os/src/pim/kernel.rs @@ -9,6 +9,14 @@ pub const TEST_KERNEL: Kernel = Kernel([ src: File::Bank, dst: File::GrfB { index: 0 }, }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfB { index: 1 }, + }, Instruction::ADD { src0: File::GrfA { index: 0 }, src1: File::GrfA { index: 0 }, @@ -21,6 +29,18 @@ pub const TEST_KERNEL: Kernel = Kernel([ dst: File::GrfB { index: 0 }, aam: false, }, + Instruction::ADD { + src0: File::GrfA { index: 1 }, + src1: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::GrfB { index: 1 }, + src1: File::GrfB { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, // Instruction::MOV { // src: File::Bank, // dst: File::GrfA { index: 1 }, @@ -51,6 +71,14 @@ pub const TEST_KERNEL: Kernel = Kernel([ src: File::GrfB { index: 0 }, dst: File::Bank, }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, Instruction::EXIT, Instruction::NOP, Instruction::NOP, @@ -71,10 +99,4 @@ pub const TEST_KERNEL: Kernel = Kernel([ Instruction::NOP, Instruction::NOP, Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, ]);