From 92499fe95bb5eb51afb5653410786fd1783012a2 Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Thu, 4 Jan 2024 19:44:16 +0100 Subject: [PATCH] Map PIM region to uncacheable region --- pim-os/aarch64-gem5.ld | 10 +++++----- pim-os/src/boot.rs | 5 ----- pim-os/src/critical_section.rs | 14 ++++++++++++++ pim-os/src/main.rs | 28 ++++++++++++++-------------- pim-os/src/pim/array.rs | 8 ++------ pim-os/src/pim/config.rs | 33 ++++++++++++++++++++++----------- pim-os/src/pim/kernel.rs | 25 ++++++++++--------------- pim-os/src/pim/state.rs | 10 +--------- pim-os/src/start.s | 30 ++++++++++-------------------- 9 files changed, 78 insertions(+), 85 deletions(-) create mode 100644 pim-os/src/critical_section.rs diff --git a/pim-os/aarch64-gem5.ld b/pim-os/aarch64-gem5.ld index ed3874a..4fcae3c 100644 --- a/pim-os/aarch64-gem5.ld +++ b/pim-os/aarch64-gem5.ld @@ -1,16 +1,15 @@ MEMORY { bootmem : ORIGIN = 0x0, LENGTH = 0x100000 - dram : ORIGIN = 0x80000000, LENGTH = 0x20000000 - dram_pim : ORIGIN = 0xA0000000, LENGTH = 0x20000000 + dram : ORIGIN = 0x80000000, LENGTH = 0x40000000 + dram_pim_config : ORIGIN = 0xC0000000, LENGTH = 0x4000 + dram_pim_data : ORIGIN = 0xC0004000, LENGTH = 0x3FFFC000 } ENTRY(_start) SECTIONS { .init : { *(.init) } > bootmem - .pim_config : { KEEP(*(.pim_config)) } > dram - # . = . + 0x4000; .text : { KEEP(*(.text)) } > dram .data : { *(.data) } > dram .rodata : { *(.rodata) } > dram @@ -20,5 +19,6 @@ SECTIONS . = . + 0x100000; # 1 MiB Stack LD_STACK_PTR = .; - .pim_data : { KEEP(*(.pim_data)) } > dram_pim + .pim_config : { KEEP(*(.pim_config)) } > dram_pim_config + .pim_data : { KEEP(*(.pim_data)) } > dram_pim_data } diff --git a/pim-os/src/boot.rs b/pim-os/src/boot.rs index 3935089..eccbde1 100644 --- a/pim-os/src/boot.rs +++ b/pim-os/src/boot.rs @@ -1,8 +1,3 @@ use core::arch::global_asm; global_asm!(include_str!("start.s")); - -extern "C" { - pub fn set_page_table_cache(); - pub fn set_page_table_non_cache(); -} diff --git a/pim-os/src/critical_section.rs b/pim-os/src/critical_section.rs new file mode 100644 index 0000000..9931a77 --- /dev/null +++ b/pim-os/src/critical_section.rs @@ -0,0 +1,14 @@ +use critical_section::RawRestoreState; + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState { + // no special implementation as interrupts are not used in the project + } + + unsafe fn release(_token: RawRestoreState) { + // no special implementation as interrupts are not used in the project + } +} diff --git a/pim-os/src/main.rs b/pim-os/src/main.rs index dc738ab..e368f88 100644 --- a/pim-os/src/main.rs +++ b/pim-os/src/main.rs @@ -17,12 +17,11 @@ use embedded_alloc::Heap; use half::f16; use nalgebra::Matrix; use pim::{ - array::{DummyArray, PimMatrixArena, PimRegion, PimStorage, NUMBER_OF_BANKS}, - kernel::{execute_matrix_add, execute_matrix_multiply, MATRIX_ADD, MATRIX_MUL}, + array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, + kernel::{execute_matrix_multiply, MATRIX_MUL}, state::PimState, vector::{F16x1, F16x16}, }; -use pim_isa::BankMode; use uart::Uart0; mod boot; @@ -49,13 +48,13 @@ pub extern "C" fn entry() -> ! { let mut pim_state = PimState::new(&MATRIX_MUL); pim_state.set_kernel(); - let mut pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( + let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( [[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3], ))); - let mut pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( + let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( [[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3], ))); - let mut pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena( + let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena( [[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3], ))); let pim_storage0 = PimStorage { @@ -71,6 +70,15 @@ pub extern "C" fn entry() -> ! { index: 0, }; + writeln!( + &mut uart, + "arena0: {:?}\narena1: {:?}\narena2: {:?}", + core::ptr::addr_of!(*pim_matrix_arena0.borrow()), + core::ptr::addr_of!(*pim_matrix_arena1.borrow()), + core::ptr::addr_of!(*pim_matrix_arena2.borrow()), + ) + .unwrap(); + let mut matrix0 = Matrix::from_data(pim_storage0); matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); @@ -81,14 +89,9 @@ pub extern "C" fn entry() -> ! { writeln!(&mut uart, "{matrix0} * {matrix1}\n=").unwrap(); - // Invalidate and flush array just in case - pim_matrix_arena0.borrow_mut().invalidate_flush(); - pim_matrix_arena1.borrow_mut().invalidate_flush(); - pim_matrix_arena2.borrow_mut().invalidate_flush(); let mut dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); barrier::dsb(barrier::SY); - // execute_matrix_add(&pim_matrix_arena0, &pim_matrix_arena1, &dummy_array); execute_matrix_multiply( &mut pim_state, &mut pim_matrix_arena0.borrow_mut(), @@ -97,9 +100,6 @@ pub extern "C" fn entry() -> ! { dummy_array.as_mut(), ); - pim_matrix_arena2.borrow().invalidate(); - barrier::dsb(barrier::SY); - writeln!(&mut uart, "{matrix2}").unwrap(); m5ops::exit(); diff --git a/pim-os/src/pim/array.rs b/pim-os/src/pim/array.rs index 8ede96f..fbaf6f8 100644 --- a/pim-os/src/pim/array.rs +++ b/pim-os/src/pim/array.rs @@ -87,9 +87,6 @@ pub trait PimRegion { self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); barrier::dsb(barrier::SY); - - self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); - barrier::dsb(barrier::SY); } fn execute_instruction_read_dual_bank(&self) { @@ -110,9 +107,8 @@ pub trait PimRegion { fn read_data_bank(&self, bank_index: usize) { let bank = self.bank_ptr(bank_index); - unsafe { - core::ptr::read_volatile(bank); - } + // writeln!(&mut crate::uart::Uart0 {}, "Read data {:?}", bank).unwrap(); + unsafe { core::ptr::read_volatile(bank) }; } fn execute_instruction_write_single_bank(&mut self, i: usize) { diff --git a/pim-os/src/pim/config.rs b/pim-os/src/pim/config.rs index 417dfa3..0cf56a1 100644 --- a/pim-os/src/pim/config.rs +++ b/pim-os/src/pim/config.rs @@ -1,26 +1,37 @@ -use core::arch::asm; +use core::{arch::asm, ptr::write_volatile}; #[link_section = ".pim_config"] static mut PIM_CONFIG_REGION: [u8; 0x4000] = [0; 0x4000]; -const CACHE_LINE_SIZE: usize = 32; - #[derive(Debug)] pub struct PimWriter; impl PimWriter { pub fn write(&mut self, s: &str) { unsafe { - PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes()); - PIM_CONFIG_REGION[s.len()] = b'\0'; - - // Flush all cache lines that were affected by write operation - for element in PIM_CONFIG_REGION[..s.len()].iter().step_by(CACHE_LINE_SIZE) { - asm!("dc civac, {val}", val = in(reg) element); + let mut index = 0; + for &byte in s.as_bytes() { + write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), byte as _); + asm!("dsb sy"); + index += 1; } + write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), b'\0'); - // Wait on all flushes to complete - asm!("dsb sy"); + // PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes()); + // PIM_CONFIG_REGION[s.len()] = b'\0'; + + if cfg!(feature = "cacheless") { + // Be pessimistic so that config region is not optimized away + core::hint::black_box(PIM_CONFIG_REGION); + } else { + // Flush all cache lines that were affected by write operation + for element in PIM_CONFIG_REGION[..s.len()].iter() { + asm!("dc civac, {val}", val = in(reg) element); + } + + // Wait on all flushes to complete + asm!("dsb sy"); + } } } } diff --git a/pim-os/src/pim/kernel.rs b/pim-os/src/pim/kernel.rs index fac35d6..170312e 100644 --- a/pim-os/src/pim/kernel.rs +++ b/pim-os/src/pim/kernel.rs @@ -1,6 +1,4 @@ use core::cell::RefCell; -use core::fmt::Write; - use pim_isa::{BankMode, File, Instruction, Kernel}; use super::{ @@ -88,9 +86,9 @@ pub const MATRIX_ADD: Kernel = Kernel([ ]); pub fn execute_matrix_add( - pim_matrix_arena0: &RefCell>, - pim_matrix_arena1: &RefCell>, - dummy_array: &DummyArray, + _pim_matrix_arena0: &RefCell>, + _pim_matrix_arena1: &RefCell>, + _dummy_array: &DummyArray, ) { // pim_matrix_arena0 // .borrow() @@ -182,24 +180,21 @@ pub fn execute_matrix_multiply( ) { pim_state.set_bank_mode(BankMode::PimAllBank); - let mut i = 0; - while i < (R * C) { + for i in 0..(R * C) { let left_index = i % R; let right_index = (i / R) * R; - pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 0); - pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 1); - pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 2); + for k in 0..R { + pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * k); + } - pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 0); - pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 1); - pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 2); + for k in 0..C { + pim_matrix_arena1.execute_instruction_read_single_bank(right_index + k); + } pim_matrix_arena2.execute_instruction_write_single_bank(i); dummy_array.execute_instruction_read_single_bank(0); - - i += 1; } pim_state.set_bank_mode(BankMode::SingleBank); diff --git a/pim-os/src/pim/state.rs b/pim-os/src/pim/state.rs index 8ad5c93..bc6ea2b 100644 --- a/pim-os/src/pim/state.rs +++ b/pim-os/src/pim/state.rs @@ -1,4 +1,4 @@ -use crate::boot; + use super::config::PimWriter; use pim_isa::{BankMode, Kernel, PimConfig}; @@ -19,14 +19,6 @@ impl PimState { // TODO return token and return to singlebank when dropped pub fn set_bank_mode(&mut self, bank_mode: BankMode) { - if cfg!(feature = "cacheless") { - match bank_mode { - BankMode::SingleBank => unsafe { boot::set_page_table_cache() }, - BankMode::AllBank => (), - BankMode::PimAllBank => unsafe { boot::set_page_table_non_cache() }, - } - } - self.writer.write( serde_json_core::to_string::(&PimConfig { kernel: None, diff --git a/pim-os/src/start.s b/pim-os/src/start.s index 33fc66d..1c4c617 100644 --- a/pim-os/src/start.s +++ b/pim-os/src/start.s @@ -31,20 +31,18 @@ ttb0_base: BLOCK_1GB (ADDR << 29), 0, 0x740 .set ADDR, ADDR+2 .endr -.rept 0x3E + +// Cached normal DRAM region BLOCK_1GB (ADDR << 29), 0, 0x74C .set ADDR, ADDR+2 -.endr -.align 12 -ttb0_base_non_cache: -.set ADDR, 0x000 -.rept 0x02 +// Non-cached PIM DRAM region BLOCK_1GB (ADDR << 29), 0, 0x740 .set ADDR, ADDR+2 -.endr -.rept 0x3E -BLOCK_1GB (ADDR << 29), 0, 0x740 + +// Map rest of Page Table to avoid undefined behavior +.rept 0x3C +BLOCK_1GB (ADDR << 29), 0, 0x74C .set ADDR, ADDR+2 .endr @@ -60,7 +58,7 @@ _start: ldr x1, =0xFF440400 msr mair_el3, x1 // ATTR0 Device-nGnRnE ATTR1 Device. ATTR2 Normal Non-Cacheable. ATTR3 Normal Cacheable. - bl set_page_table_cache + bl set_page_table // Enable MMU and caches mrs x0, sctlr_el3 @@ -73,18 +71,10 @@ _start: bl entry -.globl set_page_table_cache -set_page_table_cache: +.globl set_page_table +set_page_table: adr x0, ttb0_base msr ttbr0_el3, x0 tlbi alle3 isb ret - -.globl set_page_table_non_cache -set_page_table_non_cache: - adr x0, ttb0_base_non_cache - msr ttbr0_el3, x0 - tlbi alle3 - isb - ret