diff --git a/pim-os/src/main.rs b/pim-os/src/main.rs index 53034f2..ec98a5e 100644 --- a/pim-os/src/main.rs +++ b/pim-os/src/main.rs @@ -17,8 +17,12 @@ use embedded_alloc::Heap; use half::f16; use nalgebra::Matrix; use pim::{ - array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, - kernel::{execute_matrix_multiply_rowwise, MATRIX_MUL}, + array::{DummyArray, PimMatrixArena, PimScalarArena, PimStorage, NUMBER_OF_BANKS}, + kernel::{ + execute_matrix_add, execute_matrix_multiply, execute_matrix_scalar_multiply, + execute_matrix_vector_multiply, MATRIX_ADD, MATRIX_MUL, MATRIX_SCALAR_MUL, + MATRIX_VECTOR_MUL, + }, state::PimState, vector::{F16x1, F16x16}, }; @@ -45,7 +49,7 @@ pub extern "C" fn entry() -> ! { } let mut uart = Uart0; - let mut pim_state = PimState::new(&MATRIX_MUL); + let mut pim_state = PimState::new(&MATRIX_SCALAR_MUL); pim_state.set_kernel(); let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( @@ -92,13 +96,39 @@ pub extern "C" fn entry() -> ! { let mut dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); barrier::dsb(barrier::SY); - execute_matrix_multiply_rowwise( - &mut pim_state, - &mut pim_matrix_arena0.borrow_mut(), - &mut pim_matrix_arena1.borrow_mut(), - &mut pim_matrix_arena2.borrow_mut(), - dummy_array.as_mut(), - ); + // execute_matrix_add( + // &mut pim_state, + // &mut pim_matrix_arena0.borrow_mut(), + // &mut pim_matrix_arena1.borrow_mut(), + // &mut pim_matrix_arena2.borrow_mut(), + // dummy_array.as_mut(), + // ); + // execute_matrix_multiply_rowwise( + // &mut pim_state, + // &mut pim_matrix_arena0.borrow_mut(), + // &mut pim_matrix_arena1.borrow_mut(), + // &mut pim_matrix_arena2.borrow_mut(), + // dummy_array.as_mut(), + // ); + // execute_matrix_vector_multiply( + // &mut pim_state, + // &mut pim_matrix_arena0.borrow_mut(), + // &mut pim_matrix_arena1.borrow_mut(), + // &mut pim_matrix_arena2.borrow_mut(), + // dummy_array.as_mut(), + // ); + + // let pim_scalar_arena = Box::new(PimScalarArena( + // [F16x16([F16x1(f16::from_f32(2.0)); 16]); 32], + // )); + + // execute_matrix_scalar_multiply( + // &mut pim_state, + // &pim_scalar_arena, + // &pim_matrix_arena0.borrow_mut(), + // &mut pim_matrix_arena1.borrow_mut(), + // dummy_array.as_mut(), + // ); writeln!(&mut uart, "{matrix2}").unwrap(); diff --git a/pim-os/src/pim/array.rs b/pim-os/src/pim/array.rs index fbaf6f8..db06d66 100644 --- a/pim-os/src/pim/array.rs +++ b/pim-os/src/pim/array.rs @@ -24,6 +24,22 @@ impl PimRegion for PimMatrixArena { } } +#[derive(Clone, Debug)] +#[repr(C, align(1024))] +pub struct PimScalarArena(pub [F16x16; NUMBER_OF_BANKS]); + +impl PimRegion for PimScalarArena { + const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 } + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 } + } +} + #[derive(Debug)] pub struct PimStorage<'a, const R: usize, const C: usize> { pub arena: &'a RefCell>, diff --git a/pim-os/src/pim/config.rs b/pim-os/src/pim/config.rs index 0cf56a1..7fbd719 100644 --- a/pim-os/src/pim/config.rs +++ b/pim-os/src/pim/config.rs @@ -1,3 +1,4 @@ +use aarch64_cpu::asm::barrier; use core::{arch::asm, ptr::write_volatile}; #[link_section = ".pim_config"] @@ -12,7 +13,7 @@ impl PimWriter { let mut index = 0; for &byte in s.as_bytes() { write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), byte as _); - asm!("dsb sy"); + barrier::dsb(barrier::SY); index += 1; } write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), b'\0'); @@ -29,8 +30,7 @@ impl PimWriter { asm!("dc civac, {val}", val = in(reg) element); } - // Wait on all flushes to complete - asm!("dsb sy"); + barrier::dsb(barrier::SY); } } } diff --git a/pim-os/src/pim/kernel.rs b/pim-os/src/pim/kernel.rs index 166d84c..efbd3ea 100644 --- a/pim-os/src/pim/kernel.rs +++ b/pim-os/src/pim/kernel.rs @@ -1,27 +1,41 @@ -use core::cell::RefCell; -use pim_isa::{BankMode, File, Instruction, Kernel}; - use super::{ - array::{DummyArray, PimMatrixArena, PimRegion}, + array::{DummyArray, PimMatrixArena, PimRegion, PimScalarArena}, state::PimState, }; +use pim_isa::{BankMode, File, Instruction, Kernel}; pub const MATRIX_ADD: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, }, - Instruction::MOV { - src: File::Bank, - dst: File::GrfB { index: 0 }, - }, Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 1 }, }, Instruction::MOV { src: File::Bank, - dst: File::GrfB { index: 1 }, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, }, Instruction::ADD { src0: File::Bank, @@ -29,12 +43,6 @@ pub const MATRIX_ADD: Kernel = Kernel([ dst: File::GrfA { index: 0 }, aam: false, }, - Instruction::ADD { - src0: File::Bank, - src1: File::GrfB { index: 0 }, - dst: File::GrfB { index: 0 }, - aam: false, - }, Instruction::ADD { src0: File::Bank, src1: File::GrfA { index: 1 }, @@ -43,24 +51,70 @@ pub const MATRIX_ADD: Kernel = Kernel([ }, Instruction::ADD { src0: File::Bank, - src1: File::GrfB { index: 1 }, - dst: File::GrfB { index: 1 }, + src1: File::GrfA { index: 2 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 4 }, + dst: File::GrfA { index: 4 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 5 }, + dst: File::GrfA { index: 5 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 6 }, + dst: File::GrfA { index: 6 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 7 }, + dst: File::GrfA { index: 7 }, aam: false, }, Instruction::FILL { src: File::GrfA { index: 0 }, dst: File::Bank, }, - Instruction::FILL { - src: File::GrfB { index: 0 }, - dst: File::Bank, - }, Instruction::FILL { src: File::GrfA { index: 1 }, dst: File::Bank, }, Instruction::FILL { - src: File::GrfB { index: 1 }, + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 7 }, dst: File::Bank, }, Instruction::EXIT, @@ -71,35 +125,34 @@ pub const MATRIX_ADD: Kernel = Kernel([ Instruction::NOP, Instruction::NOP, Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, - Instruction::NOP, ]); -pub fn execute_matrix_add( - _pim_matrix_arena0: &RefCell>, - _pim_matrix_arena1: &RefCell>, - _dummy_array: &DummyArray, +pub fn execute_matrix_add( + pim_state: &mut PimState, + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, + pim_matrix_arena2: &mut PimMatrixArena, + dummy_array: &DummyArray, ) { - // pim_matrix_arena0 - // .borrow() - // .execute_instruction_read_dual_bank(); - // pim_matrix_arena1 - // .borrow() - // .execute_instruction_read_dual_bank(); - // pim_matrix_arena0 - // .borrow_mut() - // .execute_instruction_write_dual_bank(); - // dummy_array.execute_instruction_read_single_bank(); + pim_state.set_bank_mode(BankMode::PimAllBank); + + for column in 0..C { + for row in 0..R { + pim_matrix_arena0.execute_instruction_read_single_bank(column * R + row); + } + + for row in 0..R { + pim_matrix_arena1.execute_instruction_read_single_bank(column * R + row); + } + + for row in 0..R { + pim_matrix_arena2.execute_instruction_write_single_bank(column * R + row); + } + + dummy_array.execute_instruction_read_single_bank(0); + } + + pim_state.set_bank_mode(BankMode::SingleBank); } pub const MATRIX_MUL: Kernel = Kernel([ @@ -194,41 +247,42 @@ pub const MATRIX_MUL: Kernel = Kernel([ Instruction::NOP, ]); -pub fn execute_matrix_multiply_elementwise( +// Vlt in der Thesis kurz erwähnen und dann zu AAM überleiten +// pub fn execute_matrix_multiply_elementwise( +// pim_state: &mut PimState, +// pim_matrix_arena0: &mut PimMatrixArena, +// pim_matrix_arena1: &mut PimMatrixArena, +// pim_matrix_arena2: &mut PimMatrixArena, +// dummy_array: &mut DummyArray, +// ) { +// pim_state.set_bank_mode(BankMode::PimAllBank); + +// for i in 0..(R * C) { +// let start_column = i % R; +// let start_row = (i / R) * R; + +// for j in 0..C { +// pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j); +// } + +// for j in 0..R { +// pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j); +// } + +// pim_matrix_arena2.execute_instruction_write_single_bank(i); + +// dummy_array.execute_instruction_read_single_bank(0); +// } + +// pim_state.set_bank_mode(BankMode::SingleBank); +// } + +pub fn execute_matrix_multiply( pim_state: &mut PimState, - pim_matrix_arena0: &mut PimMatrixArena, - pim_matrix_arena1: &mut PimMatrixArena, + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, pim_matrix_arena2: &mut PimMatrixArena, - dummy_array: &mut DummyArray, -) { - pim_state.set_bank_mode(BankMode::PimAllBank); - - for i in 0..(R * C) { - let start_column = i % R; - let start_row = (i / R) * R; - - for j in 0..C { - pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j); - } - - for j in 0..R { - pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j); - } - - pim_matrix_arena2.execute_instruction_write_single_bank(i); - - dummy_array.execute_instruction_read_single_bank(0); - } - - pim_state.set_bank_mode(BankMode::SingleBank); -} - -pub fn execute_matrix_multiply_rowwise( - pim_state: &mut PimState, - pim_matrix_arena0: &mut PimMatrixArena, - pim_matrix_arena1: &mut PimMatrixArena, - pim_matrix_arena2: &mut PimMatrixArena, - dummy_array: &mut DummyArray, + dummy_array: &DummyArray, ) { pim_state.set_bank_mode(BankMode::PimAllBank); @@ -252,3 +306,228 @@ pub fn execute_matrix_multiply_rowwise( pim_state.set_bank_mode(BankMode::SingleBank); } + +pub const MATRIX_VECTOR_MUL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 7, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute_matrix_vector_multiply( + pim_state: &mut PimState, + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, + pim_matrix_arena2: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + pim_state.set_bank_mode(BankMode::PimAllBank); + + for row in 0..R { + for i in 0..C { + pim_matrix_arena0.execute_instruction_read_single_bank(row + R * i); + } + + for i in 0..R { + pim_matrix_arena1.execute_instruction_read_single_bank(i); + } + + pim_matrix_arena2.execute_instruction_write_single_bank(row); + + dummy_array.execute_instruction_read_single_bank(0); + } + + pim_state.set_bank_mode(BankMode::SingleBank); +} + +pub const MATRIX_SCALAR_MUL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 4 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 5 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 6 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute_matrix_scalar_multiply( + pim_state: &mut PimState, + pim_scalar_arena: &PimScalarArena, + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + pim_state.set_bank_mode(BankMode::PimAllBank); + + for column in 0..C { + pim_scalar_arena.execute_instruction_read_single_bank(0); + + for i in 0..R { + pim_matrix_arena0.execute_instruction_read_single_bank(column * R + i); + } + + for i in 0..R { + pim_matrix_arena1.execute_instruction_write_single_bank(column * R + i); + } + + dummy_array.execute_instruction_read_single_bank(0); + } + + pim_state.set_bank_mode(BankMode::SingleBank); +} diff --git a/pim-vm/src/lib.rs b/pim-vm/src/lib.rs index 7f4c226..273cdf0 100644 --- a/pim-vm/src/lib.rs +++ b/pim-vm/src/lib.rs @@ -138,7 +138,7 @@ impl PimVM { if pim_unit_index == 0 { log::debug!( - "PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute PC {}: {inst:?}", + "PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute Read PC {}: {inst:?}", pim_unit.pc ); } @@ -331,13 +331,13 @@ impl PimVM { pim_unit.pc += 1; } - if pim_unit_index == 0 { - log::debug!( - "PimUnit {pim_unit_index} JUMP to PC {}: {:?}", - pim_unit.pc, - self.kernel.0[pim_unit.pc as usize] - ); - } + // if pim_unit_index == 0 { + // log::debug!( + // "PimUnit {pim_unit_index} JUMP to PC {}: {:?}", + // pim_unit.pc, + // self.kernel.0[pim_unit.pc as usize] + // ); + // } } } @@ -353,7 +353,7 @@ impl PimVM { if pim_unit_index == 0 { log::debug!( - "PimUnit {pim_unit_index} Execute PC {}: {inst:?}", + "PimUnit {pim_unit_index} Execute Write PC {}: {inst:?}", pim_unit.pc ); }