Map PIM region to uncacheable region

This commit is contained in:
2024-01-04 19:44:16 +01:00
parent 34c8ab84fb
commit 92499fe95b
9 changed files with 78 additions and 85 deletions

View File

@@ -1,16 +1,15 @@
MEMORY
{
bootmem : ORIGIN = 0x0, LENGTH = 0x100000
dram : ORIGIN = 0x80000000, LENGTH = 0x20000000
dram_pim : ORIGIN = 0xA0000000, LENGTH = 0x20000000
dram : ORIGIN = 0x80000000, LENGTH = 0x40000000
dram_pim_config : ORIGIN = 0xC0000000, LENGTH = 0x4000
dram_pim_data : ORIGIN = 0xC0004000, LENGTH = 0x3FFFC000
}
ENTRY(_start)
SECTIONS
{
.init : { *(.init) } > bootmem
.pim_config : { KEEP(*(.pim_config)) } > dram
# . = . + 0x4000;
.text : { KEEP(*(.text)) } > dram
.data : { *(.data) } > dram
.rodata : { *(.rodata) } > dram
@@ -20,5 +19,6 @@ SECTIONS
. = . + 0x100000; # 1 MiB Stack
LD_STACK_PTR = .;
.pim_data : { KEEP(*(.pim_data)) } > dram_pim
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config
.pim_data : { KEEP(*(.pim_data)) } > dram_pim_data
}

View File

@@ -1,8 +1,3 @@
use core::arch::global_asm;
global_asm!(include_str!("start.s"));
extern "C" {
pub fn set_page_table_cache();
pub fn set_page_table_non_cache();
}

View File

@@ -0,0 +1,14 @@
use critical_section::RawRestoreState;
struct CriticalSection;
critical_section::set_impl!(CriticalSection);
unsafe impl critical_section::Impl for CriticalSection {
unsafe fn acquire() -> RawRestoreState {
// no special implementation as interrupts are not used in the project
}
unsafe fn release(_token: RawRestoreState) {
// no special implementation as interrupts are not used in the project
}
}

View File

@@ -17,12 +17,11 @@ use embedded_alloc::Heap;
use half::f16;
use nalgebra::Matrix;
use pim::{
array::{DummyArray, PimMatrixArena, PimRegion, PimStorage, NUMBER_OF_BANKS},
kernel::{execute_matrix_add, execute_matrix_multiply, MATRIX_ADD, MATRIX_MUL},
array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS},
kernel::{execute_matrix_multiply, MATRIX_MUL},
state::PimState,
vector::{F16x1, F16x16},
};
use pim_isa::BankMode;
use uart::Uart0;
mod boot;
@@ -49,13 +48,13 @@ pub extern "C" fn entry() -> ! {
let mut pim_state = PimState::new(&MATRIX_MUL);
pim_state.set_kernel();
let mut pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3],
)));
let mut pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3],
)));
let mut pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena(
let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 3]; 3],
)));
let pim_storage0 = PimStorage {
@@ -71,6 +70,15 @@ pub extern "C" fn entry() -> ! {
index: 0,
};
writeln!(
&mut uart,
"arena0: {:?}\narena1: {:?}\narena2: {:?}",
core::ptr::addr_of!(*pim_matrix_arena0.borrow()),
core::ptr::addr_of!(*pim_matrix_arena1.borrow()),
core::ptr::addr_of!(*pim_matrix_arena2.borrow()),
)
.unwrap();
let mut matrix0 = Matrix::from_data(pim_storage0);
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
@@ -81,14 +89,9 @@ pub extern "C" fn entry() -> ! {
writeln!(&mut uart, "{matrix0} * {matrix1}\n=").unwrap();
// Invalidate and flush array just in case
pim_matrix_arena0.borrow_mut().invalidate_flush();
pim_matrix_arena1.borrow_mut().invalidate_flush();
pim_matrix_arena2.borrow_mut().invalidate_flush();
let mut dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
barrier::dsb(barrier::SY);
// execute_matrix_add(&pim_matrix_arena0, &pim_matrix_arena1, &dummy_array);
execute_matrix_multiply(
&mut pim_state,
&mut pim_matrix_arena0.borrow_mut(),
@@ -97,9 +100,6 @@ pub extern "C" fn entry() -> ! {
dummy_array.as_mut(),
);
pim_matrix_arena2.borrow().invalidate();
barrier::dsb(barrier::SY);
writeln!(&mut uart, "{matrix2}").unwrap();
m5ops::exit();

View File

@@ -87,9 +87,6 @@ pub trait PimRegion {
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
fn execute_instruction_read_dual_bank(&self) {
@@ -110,9 +107,8 @@ pub trait PimRegion {
fn read_data_bank(&self, bank_index: usize) {
let bank = self.bank_ptr(bank_index);
unsafe {
core::ptr::read_volatile(bank);
}
// writeln!(&mut crate::uart::Uart0 {}, "Read data {:?}", bank).unwrap();
unsafe { core::ptr::read_volatile(bank) };
}
fn execute_instruction_write_single_bank(&mut self, i: usize) {

View File

@@ -1,21 +1,31 @@
use core::arch::asm;
use core::{arch::asm, ptr::write_volatile};
#[link_section = ".pim_config"]
static mut PIM_CONFIG_REGION: [u8; 0x4000] = [0; 0x4000];
const CACHE_LINE_SIZE: usize = 32;
#[derive(Debug)]
pub struct PimWriter;
impl PimWriter {
pub fn write(&mut self, s: &str) {
unsafe {
PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes());
PIM_CONFIG_REGION[s.len()] = b'\0';
let mut index = 0;
for &byte in s.as_bytes() {
write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), byte as _);
asm!("dsb sy");
index += 1;
}
write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), b'\0');
// PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes());
// PIM_CONFIG_REGION[s.len()] = b'\0';
if cfg!(feature = "cacheless") {
// Be pessimistic so that config region is not optimized away
core::hint::black_box(PIM_CONFIG_REGION);
} else {
// Flush all cache lines that were affected by write operation
for element in PIM_CONFIG_REGION[..s.len()].iter().step_by(CACHE_LINE_SIZE) {
for element in PIM_CONFIG_REGION[..s.len()].iter() {
asm!("dc civac, {val}", val = in(reg) element);
}
@@ -23,4 +33,5 @@ impl PimWriter {
asm!("dsb sy");
}
}
}
}

View File

@@ -1,6 +1,4 @@
use core::cell::RefCell;
use core::fmt::Write;
use pim_isa::{BankMode, File, Instruction, Kernel};
use super::{
@@ -88,9 +86,9 @@ pub const MATRIX_ADD: Kernel = Kernel([
]);
pub fn execute_matrix_add(
pim_matrix_arena0: &RefCell<PimMatrixArena<8, 8>>,
pim_matrix_arena1: &RefCell<PimMatrixArena<8, 8>>,
dummy_array: &DummyArray,
_pim_matrix_arena0: &RefCell<PimMatrixArena<8, 8>>,
_pim_matrix_arena1: &RefCell<PimMatrixArena<8, 8>>,
_dummy_array: &DummyArray,
) {
// pim_matrix_arena0
// .borrow()
@@ -182,24 +180,21 @@ pub fn execute_matrix_multiply<const R: usize, const C: usize>(
) {
pim_state.set_bank_mode(BankMode::PimAllBank);
let mut i = 0;
while i < (R * C) {
for i in 0..(R * C) {
let left_index = i % R;
let right_index = (i / R) * R;
pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 0);
pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 1);
pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * 2);
for k in 0..R {
pim_matrix_arena0.execute_instruction_read_single_bank(left_index + R * k);
}
pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 0);
pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 1);
pim_matrix_arena1.execute_instruction_read_single_bank(right_index + 2);
for k in 0..C {
pim_matrix_arena1.execute_instruction_read_single_bank(right_index + k);
}
pim_matrix_arena2.execute_instruction_write_single_bank(i);
dummy_array.execute_instruction_read_single_bank(0);
i += 1;
}
pim_state.set_bank_mode(BankMode::SingleBank);

View File

@@ -1,4 +1,4 @@
use crate::boot;
use super::config::PimWriter;
use pim_isa::{BankMode, Kernel, PimConfig};
@@ -19,14 +19,6 @@ impl PimState {
// TODO return token and return to singlebank when dropped
pub fn set_bank_mode(&mut self, bank_mode: BankMode) {
if cfg!(feature = "cacheless") {
match bank_mode {
BankMode::SingleBank => unsafe { boot::set_page_table_cache() },
BankMode::AllBank => (),
BankMode::PimAllBank => unsafe { boot::set_page_table_non_cache() },
}
}
self.writer.write(
serde_json_core::to_string::<PimConfig, 64>(&PimConfig {
kernel: None,

View File

@@ -31,20 +31,18 @@ ttb0_base:
BLOCK_1GB (ADDR << 29), 0, 0x740
.set ADDR, ADDR+2
.endr
.rept 0x3E
// Cached normal DRAM region
BLOCK_1GB (ADDR << 29), 0, 0x74C
.set ADDR, ADDR+2
.endr
.align 12
ttb0_base_non_cache:
.set ADDR, 0x000
.rept 0x02
// Non-cached PIM DRAM region
BLOCK_1GB (ADDR << 29), 0, 0x740
.set ADDR, ADDR+2
.endr
.rept 0x3E
BLOCK_1GB (ADDR << 29), 0, 0x740
// Map rest of Page Table to avoid undefined behavior
.rept 0x3C
BLOCK_1GB (ADDR << 29), 0, 0x74C
.set ADDR, ADDR+2
.endr
@@ -60,7 +58,7 @@ _start:
ldr x1, =0xFF440400
msr mair_el3, x1 // ATTR0 Device-nGnRnE ATTR1 Device. ATTR2 Normal Non-Cacheable. ATTR3 Normal Cacheable.
bl set_page_table_cache
bl set_page_table
// Enable MMU and caches
mrs x0, sctlr_el3
@@ -73,18 +71,10 @@ _start:
bl entry
.globl set_page_table_cache
set_page_table_cache:
.globl set_page_table
set_page_table:
adr x0, ttb0_base
msr ttbr0_el3, x0
tlbi alle3
isb
ret
.globl set_page_table_non_cache
set_page_table_non_cache:
adr x0, ttb0_base_non_cache
msr ttbr0_el3, x0
tlbi alle3
isb
ret