Introduce a bunch of new kernels

This commit is contained in:
2024-01-06 19:05:13 +01:00
parent 29d9cee52c
commit 4510a36a10
5 changed files with 428 additions and 103 deletions

View File

@@ -17,8 +17,12 @@ use embedded_alloc::Heap;
use half::f16; use half::f16;
use nalgebra::Matrix; use nalgebra::Matrix;
use pim::{ use pim::{
array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, array::{DummyArray, PimMatrixArena, PimScalarArena, PimStorage, NUMBER_OF_BANKS},
kernel::{execute_matrix_multiply_rowwise, MATRIX_MUL}, kernel::{
execute_matrix_add, execute_matrix_multiply, execute_matrix_scalar_multiply,
execute_matrix_vector_multiply, MATRIX_ADD, MATRIX_MUL, MATRIX_SCALAR_MUL,
MATRIX_VECTOR_MUL,
},
state::PimState, state::PimState,
vector::{F16x1, F16x16}, vector::{F16x1, F16x16},
}; };
@@ -45,7 +49,7 @@ pub extern "C" fn entry() -> ! {
} }
let mut uart = Uart0; let mut uart = Uart0;
let mut pim_state = PimState::new(&MATRIX_MUL); let mut pim_state = PimState::new(&MATRIX_SCALAR_MUL);
pim_state.set_kernel(); pim_state.set_kernel();
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
@@ -92,13 +96,39 @@ pub extern "C" fn entry() -> ! {
let mut dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); let mut dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
barrier::dsb(barrier::SY); barrier::dsb(barrier::SY);
execute_matrix_multiply_rowwise( // execute_matrix_add(
&mut pim_state, // &mut pim_state,
&mut pim_matrix_arena0.borrow_mut(), // &mut pim_matrix_arena0.borrow_mut(),
&mut pim_matrix_arena1.borrow_mut(), // &mut pim_matrix_arena1.borrow_mut(),
&mut pim_matrix_arena2.borrow_mut(), // &mut pim_matrix_arena2.borrow_mut(),
dummy_array.as_mut(), // dummy_array.as_mut(),
); // );
// execute_matrix_multiply_rowwise(
// &mut pim_state,
// &mut pim_matrix_arena0.borrow_mut(),
// &mut pim_matrix_arena1.borrow_mut(),
// &mut pim_matrix_arena2.borrow_mut(),
// dummy_array.as_mut(),
// );
// execute_matrix_vector_multiply(
// &mut pim_state,
// &mut pim_matrix_arena0.borrow_mut(),
// &mut pim_matrix_arena1.borrow_mut(),
// &mut pim_matrix_arena2.borrow_mut(),
// dummy_array.as_mut(),
// );
// let pim_scalar_arena = Box::new(PimScalarArena(
// [F16x16([F16x1(f16::from_f32(2.0)); 16]); 32],
// ));
// execute_matrix_scalar_multiply(
// &mut pim_state,
// &pim_scalar_arena,
// &pim_matrix_arena0.borrow_mut(),
// &mut pim_matrix_arena1.borrow_mut(),
// dummy_array.as_mut(),
// );
writeln!(&mut uart, "{matrix2}").unwrap(); writeln!(&mut uart, "{matrix2}").unwrap();

View File

@@ -24,6 +24,22 @@ impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
} }
} }
#[derive(Clone, Debug)]
#[repr(C, align(1024))]
pub struct PimScalarArena(pub [F16x16; NUMBER_OF_BANKS]);
impl PimRegion for PimScalarArena {
const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 }
}
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 }
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct PimStorage<'a, const R: usize, const C: usize> { pub struct PimStorage<'a, const R: usize, const C: usize> {
pub arena: &'a RefCell<PimMatrixArena<R, C>>, pub arena: &'a RefCell<PimMatrixArena<R, C>>,

View File

@@ -1,3 +1,4 @@
use aarch64_cpu::asm::barrier;
use core::{arch::asm, ptr::write_volatile}; use core::{arch::asm, ptr::write_volatile};
#[link_section = ".pim_config"] #[link_section = ".pim_config"]
@@ -12,7 +13,7 @@ impl PimWriter {
let mut index = 0; let mut index = 0;
for &byte in s.as_bytes() { for &byte in s.as_bytes() {
write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), byte as _); write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), byte as _);
asm!("dsb sy"); barrier::dsb(barrier::SY);
index += 1; index += 1;
} }
write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), b'\0'); write_volatile((&mut PIM_CONFIG_REGION as *mut u8).offset(index), b'\0');
@@ -29,8 +30,7 @@ impl PimWriter {
asm!("dc civac, {val}", val = in(reg) element); asm!("dc civac, {val}", val = in(reg) element);
} }
// Wait on all flushes to complete barrier::dsb(barrier::SY);
asm!("dsb sy");
} }
} }
} }

View File

@@ -1,27 +1,41 @@
use core::cell::RefCell;
use pim_isa::{BankMode, File, Instruction, Kernel};
use super::{ use super::{
array::{DummyArray, PimMatrixArena, PimRegion}, array::{DummyArray, PimMatrixArena, PimRegion, PimScalarArena},
state::PimState, state::PimState,
}; };
use pim_isa::{BankMode, File, Instruction, Kernel};
pub const MATRIX_ADD: Kernel = Kernel([ pub const MATRIX_ADD: Kernel = Kernel([
Instruction::MOV { Instruction::MOV {
src: File::Bank, src: File::Bank,
dst: File::GrfA { index: 0 }, dst: File::GrfA { index: 0 },
}, },
Instruction::MOV {
src: File::Bank,
dst: File::GrfB { index: 0 },
},
Instruction::MOV { Instruction::MOV {
src: File::Bank, src: File::Bank,
dst: File::GrfA { index: 1 }, dst: File::GrfA { index: 1 },
}, },
Instruction::MOV { Instruction::MOV {
src: File::Bank, src: File::Bank,
dst: File::GrfB { index: 1 }, dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
}, },
Instruction::ADD { Instruction::ADD {
src0: File::Bank, src0: File::Bank,
@@ -29,12 +43,6 @@ pub const MATRIX_ADD: Kernel = Kernel([
dst: File::GrfA { index: 0 }, dst: File::GrfA { index: 0 },
aam: false, aam: false,
}, },
Instruction::ADD {
src0: File::Bank,
src1: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD { Instruction::ADD {
src0: File::Bank, src0: File::Bank,
src1: File::GrfA { index: 1 }, src1: File::GrfA { index: 1 },
@@ -43,24 +51,70 @@ pub const MATRIX_ADD: Kernel = Kernel([
}, },
Instruction::ADD { Instruction::ADD {
src0: File::Bank, src0: File::Bank,
src1: File::GrfB { index: 1 }, src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 1 }, dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 4 },
dst: File::GrfA { index: 4 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 5 },
dst: File::GrfA { index: 5 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 6 },
dst: File::GrfA { index: 6 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 7 },
dst: File::GrfA { index: 7 },
aam: false, aam: false,
}, },
Instruction::FILL { Instruction::FILL {
src: File::GrfA { index: 0 }, src: File::GrfA { index: 0 },
dst: File::Bank, dst: File::Bank,
}, },
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL { Instruction::FILL {
src: File::GrfA { index: 1 }, src: File::GrfA { index: 1 },
dst: File::Bank, dst: File::Bank,
}, },
Instruction::FILL { Instruction::FILL {
src: File::GrfB { index: 1 }, src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 7 },
dst: File::Bank, dst: File::Bank,
}, },
Instruction::EXIT, Instruction::EXIT,
@@ -71,35 +125,34 @@ pub const MATRIX_ADD: Kernel = Kernel([
Instruction::NOP, Instruction::NOP,
Instruction::NOP, Instruction::NOP,
Instruction::NOP, Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]); ]);
pub fn execute_matrix_add( pub fn execute_matrix_add<const R: usize, const C: usize>(
_pim_matrix_arena0: &RefCell<PimMatrixArena<8, 8>>, pim_state: &mut PimState,
_pim_matrix_arena1: &RefCell<PimMatrixArena<8, 8>>, pim_matrix_arena0: &PimMatrixArena<R, C>,
_dummy_array: &DummyArray, pim_matrix_arena1: &PimMatrixArena<R, C>,
pim_matrix_arena2: &mut PimMatrixArena<R, C>,
dummy_array: &DummyArray,
) { ) {
// pim_matrix_arena0 pim_state.set_bank_mode(BankMode::PimAllBank);
// .borrow()
// .execute_instruction_read_dual_bank(); for column in 0..C {
// pim_matrix_arena1 for row in 0..R {
// .borrow() pim_matrix_arena0.execute_instruction_read_single_bank(column * R + row);
// .execute_instruction_read_dual_bank(); }
// pim_matrix_arena0
// .borrow_mut() for row in 0..R {
// .execute_instruction_write_dual_bank(); pim_matrix_arena1.execute_instruction_read_single_bank(column * R + row);
// dummy_array.execute_instruction_read_single_bank(); }
for row in 0..R {
pim_matrix_arena2.execute_instruction_write_single_bank(column * R + row);
}
dummy_array.execute_instruction_read_single_bank(0);
}
pim_state.set_bank_mode(BankMode::SingleBank);
} }
pub const MATRIX_MUL: Kernel = Kernel([ pub const MATRIX_MUL: Kernel = Kernel([
@@ -194,41 +247,42 @@ pub const MATRIX_MUL: Kernel = Kernel([
Instruction::NOP, Instruction::NOP,
]); ]);
pub fn execute_matrix_multiply_elementwise<const R: usize, const C: usize>( // Vlt in der Thesis kurz erwähnen und dann zu AAM überleiten
// pub fn execute_matrix_multiply_elementwise<const R: usize, const C: usize>(
// pim_state: &mut PimState,
// pim_matrix_arena0: &mut PimMatrixArena<R, C>,
// pim_matrix_arena1: &mut PimMatrixArena<R, C>,
// pim_matrix_arena2: &mut PimMatrixArena<R, C>,
// dummy_array: &mut DummyArray,
// ) {
// pim_state.set_bank_mode(BankMode::PimAllBank);
// for i in 0..(R * C) {
// let start_column = i % R;
// let start_row = (i / R) * R;
// for j in 0..C {
// pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j);
// }
// for j in 0..R {
// pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j);
// }
// pim_matrix_arena2.execute_instruction_write_single_bank(i);
// dummy_array.execute_instruction_read_single_bank(0);
// }
// pim_state.set_bank_mode(BankMode::SingleBank);
// }
pub fn execute_matrix_multiply<const R: usize, const C: usize>(
pim_state: &mut PimState, pim_state: &mut PimState,
pim_matrix_arena0: &mut PimMatrixArena<R, C>, pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &mut PimMatrixArena<R, C>, pim_matrix_arena1: &PimMatrixArena<R, C>,
pim_matrix_arena2: &mut PimMatrixArena<R, C>, pim_matrix_arena2: &mut PimMatrixArena<R, C>,
dummy_array: &mut DummyArray, dummy_array: &DummyArray,
) {
pim_state.set_bank_mode(BankMode::PimAllBank);
for i in 0..(R * C) {
let start_column = i % R;
let start_row = (i / R) * R;
for j in 0..C {
pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j);
}
for j in 0..R {
pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j);
}
pim_matrix_arena2.execute_instruction_write_single_bank(i);
dummy_array.execute_instruction_read_single_bank(0);
}
pim_state.set_bank_mode(BankMode::SingleBank);
}
pub fn execute_matrix_multiply_rowwise<const R: usize, const C: usize>(
pim_state: &mut PimState,
pim_matrix_arena0: &mut PimMatrixArena<R, C>,
pim_matrix_arena1: &mut PimMatrixArena<R, C>,
pim_matrix_arena2: &mut PimMatrixArena<R, C>,
dummy_array: &mut DummyArray,
) { ) {
pim_state.set_bank_mode(BankMode::PimAllBank); pim_state.set_bank_mode(BankMode::PimAllBank);
@@ -252,3 +306,228 @@ pub fn execute_matrix_multiply_rowwise<const R: usize, const C: usize>(
pim_state.set_bank_mode(BankMode::SingleBank); pim_state.set_bank_mode(BankMode::SingleBank);
} }
pub const MATRIX_VECTOR_MUL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute_matrix_vector_multiply<const R: usize, const C: usize>(
pim_state: &mut PimState,
pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &PimMatrixArena<C, 1>,
pim_matrix_arena2: &mut PimMatrixArena<C, 1>,
dummy_array: &DummyArray,
) {
pim_state.set_bank_mode(BankMode::PimAllBank);
for row in 0..R {
for i in 0..C {
pim_matrix_arena0.execute_instruction_read_single_bank(row + R * i);
}
for i in 0..R {
pim_matrix_arena1.execute_instruction_read_single_bank(i);
}
pim_matrix_arena2.execute_instruction_write_single_bank(row);
dummy_array.execute_instruction_read_single_bank(0);
}
pim_state.set_bank_mode(BankMode::SingleBank);
}
pub const MATRIX_SCALAR_MUL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 4 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 5 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 6 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute_matrix_scalar_multiply<const R: usize, const C: usize>(
pim_state: &mut PimState,
pim_scalar_arena: &PimScalarArena,
pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &mut PimMatrixArena<R, C>,
dummy_array: &DummyArray,
) {
pim_state.set_bank_mode(BankMode::PimAllBank);
for column in 0..C {
pim_scalar_arena.execute_instruction_read_single_bank(0);
for i in 0..R {
pim_matrix_arena0.execute_instruction_read_single_bank(column * R + i);
}
for i in 0..R {
pim_matrix_arena1.execute_instruction_write_single_bank(column * R + i);
}
dummy_array.execute_instruction_read_single_bank(0);
}
pim_state.set_bank_mode(BankMode::SingleBank);
}

View File

@@ -138,7 +138,7 @@ impl PimVM {
if pim_unit_index == 0 { if pim_unit_index == 0 {
log::debug!( log::debug!(
"PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute PC {}: {inst:?}", "PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute Read PC {}: {inst:?}",
pim_unit.pc pim_unit.pc
); );
} }
@@ -331,13 +331,13 @@ impl PimVM {
pim_unit.pc += 1; pim_unit.pc += 1;
} }
if pim_unit_index == 0 { // if pim_unit_index == 0 {
log::debug!( // log::debug!(
"PimUnit {pim_unit_index} JUMP to PC {}: {:?}", // "PimUnit {pim_unit_index} JUMP to PC {}: {:?}",
pim_unit.pc, // pim_unit.pc,
self.kernel.0[pim_unit.pc as usize] // self.kernel.0[pim_unit.pc as usize]
); // );
} // }
} }
} }
@@ -353,7 +353,7 @@ impl PimVM {
if pim_unit_index == 0 { if pim_unit_index == 0 {
log::debug!( log::debug!(
"PimUnit {pim_unit_index} Execute PC {}: {inst:?}", "PimUnit {pim_unit_index} Execute Write PC {}: {inst:?}",
pim_unit.pc pim_unit.pc
); );
} }