First steps towards matrix multiplication

This commit is contained in:
2024-01-01 18:48:37 +01:00
parent dfa773b839
commit 05f184d51f
8 changed files with 409 additions and 197 deletions

8
pim-os/Cargo.lock generated
View File

@@ -182,9 +182,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.71"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
checksum = "a293318316cf6478ec1ad2a21c49390a8d5b5eae9fab736467d93fbc0edc29c5"
dependencies = [
"unicode-ident",
]
@@ -285,9 +285,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "syn"
version = "2.0.42"
version = "2.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8"
checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -12,8 +12,8 @@ use core::{
use half::f16;
use nalgebra::Matrix;
use pim::{
array::{PimMatrixArena, PimRegion, PimStorage},
kernel::TEST_KERNEL,
array::{DummyArray, PimMatrixArena, PimRegion, PimStorage, NUMBER_OF_BANKS},
kernel::{execute_matrix_add, execute_matrix_multiply, MATRIX_ADD, MATRIX_MUL},
state::PimState,
vector::{F16x1, F16x16},
};
@@ -28,55 +28,64 @@ mod uart;
#[no_mangle]
pub extern "C" fn entry() -> ! {
let mut uart = Uart0;
let mut pim_state = PimState::new(&TEST_KERNEL);
let mut pim_state = PimState::new(&MATRIX_MUL);
pim_state.set_kernel();
let pim_matrix_arena0 = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
let pim_matrix_arena1 = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
let pim_matrix_arena0 = RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
));
let pim_matrix_arena1 = RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
));
let pim_matrix_arena2 = RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
));
let pim_storage0 = PimStorage {
arena: &pim_matrix_arena0,
index: 0,
row_major: true,
};
let pim_storage1 = PimStorage {
arena: &pim_matrix_arena1,
index: 0,
row_major: false,
};
let pim_storage2 = PimStorage {
arena: &pim_matrix_arena2,
index: 0,
row_major: false,
};
let mut matrix0 = Matrix::from_data(pim_storage0);
matrix0.fill_column(0, F16x1(f16::ZERO));
matrix0.fill_column(1, F16x1(f16::ONE));
matrix0.fill_column(2, F16x1(f16::PI));
matrix0.fill_column(3, F16x1(f16::E));
matrix0.fill_column(4, F16x1(f16::EPSILON));
matrix0.fill_column(5, F16x1(f16::SQRT_2));
matrix0.fill_column(6, F16x1(f16::LN_2));
matrix0.fill_column(7, F16x1(f16::LN_10));
matrix0.fill(F16x1(f16::ONE));
let mut matrix1 = Matrix::from_data(pim_storage1);
matrix1.fill_lower_triangle(F16x1(f16::ONE), 0);
writeln!(&mut uart, "{matrix0} + {matrix1}\n=").unwrap();
let matrix2 = Matrix::from_data(pim_storage2);
writeln!(&mut uart, "{matrix0} * {matrix1}\n=").unwrap();
// Invalidate and flush array just in case
pim_matrix_arena0.borrow_mut().invalidate_flush();
pim_matrix_arena1.borrow_mut().invalidate_flush();
pim_matrix_arena2.borrow_mut().invalidate_flush();
let dummy_array = DummyArray([F16x16::default(); NUMBER_OF_BANKS]);
barrier::dsb(barrier::SY);
pim_state.set_bank_mode(BankMode::PimAllBank);
pim_matrix_arena0
.borrow()
.execute_instruction_read_dual_bank();
pim_matrix_arena1
.borrow()
.execute_instruction_read_dual_bank();
pim_matrix_arena0
.borrow_mut()
.execute_instruction_write_dual_bank();
pim_state.set_bank_mode(BankMode::SingleBank);
// execute_matrix_add(&pim_matrix_arena0, &pim_matrix_arena1, &dummy_array);
execute_matrix_multiply(
&mut pim_state,
&pim_matrix_arena0,
&pim_matrix_arena1,
&pim_matrix_arena2,
&dummy_array,
);
pim_matrix_arena0.borrow_mut().invalidate();
pim_matrix_arena2.borrow_mut().invalidate();
barrier::dsb(barrier::SY);
writeln!(&mut uart, "{matrix0}").unwrap();
writeln!(&mut uart, "{matrix2}").unwrap();
m5ops::exit();

View File

@@ -4,17 +4,16 @@ use core::{arch::asm, cell::RefCell};
use half::f16;
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut};
const NUMBER_OF_BANKS: usize = 32;
pub const NUMBER_OF_BANKS: usize = 32;
const EVEN_BANK_INDEX: usize = 0;
const ODD_BANK_INDEX: usize = 8;
#[derive(Clone, Debug)]
#[repr(C, align(1024))]
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[F16x16; R]; C]);
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[[F16x16; NUMBER_OF_BANKS]; R]; C]);
impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
const OCCUPIED_CACHE_LINES: usize = R * C;
const OCCUPIED_ROWS: usize = Self::OCCUPIED_CACHE_LINES / NUMBER_OF_BANKS;
const OCCUPIED_CACHE_LINES: usize = R * C * NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 }
@@ -29,6 +28,7 @@ impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
pub struct PimStorage<'a, const R: usize, const C: usize> {
pub arena: &'a RefCell<PimMatrixArena<R, C>>,
pub index: usize,
pub row_major: bool,
}
unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Const<C>>
@@ -46,7 +46,11 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Cons
}
fn strides(&self) -> (Self::RStride, Self::CStride) {
(Dyn(16), Dyn(16 * R))
if self.row_major {
(Dyn(16 * R * NUMBER_OF_BANKS), Dyn(16 * NUMBER_OF_BANKS))
} else {
(Dyn(16 * NUMBER_OF_BANKS), Dyn(16 * R * NUMBER_OF_BANKS))
}
}
fn is_contiguous(&self) -> bool {
@@ -74,40 +78,36 @@ unsafe impl<'a, const R: usize, const C: usize> RawStorageMut<F16x1, Const<R>, C
pub trait PimRegion {
const OCCUPIED_CACHE_LINES: usize;
const OCCUPIED_ROWS: usize;
fn bank_ptr(&self, bank_index: usize) -> *const f16;
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16;
fn execute_instruction_read_single_bank(&self) {
for i in 0..Self::OCCUPIED_ROWS {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
fn execute_instruction_read_single_bank(&self, i: usize) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
fn execute_instruction_read_dual_bank(&self) {
for i in 0..Self::OCCUPIED_ROWS {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.read_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
let i = 0;
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.read_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
fn read_data_bank(&self, bank_index: usize) {
@@ -117,43 +117,40 @@ pub trait PimRegion {
}
}
fn execute_instruction_write_single_bank(&mut self) {
for i in 0..Self::OCCUPIED_ROWS {
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
}
fn execute_instruction_write_single_bank(&mut self, i: usize) {
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
}
barrier::dsb(barrier::SY);
}
fn execute_instruction_write_dual_bank(&mut self) {
for i in 0..Self::OCCUPIED_ROWS {
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.preload_zero_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.write_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_flush_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
}
let i = 0;
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.preload_zero_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.write_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_flush_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
}
barrier::dsb(barrier::SY);
}
fn write_data_bank(&mut self, bank_index: usize) {
@@ -203,7 +200,6 @@ pub struct DummyArray(pub [F16x16; NUMBER_OF_BANKS]);
impl PimRegion for DummyArray {
const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS;
const OCCUPIED_ROWS: usize = 1;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
&self.0[bank_index] as *const F16x16 as *const f16

View File

@@ -1,6 +1,13 @@
use pim_isa::{File, Instruction, Kernel};
use core::cell::RefCell;
pub const TEST_KERNEL: Kernel = Kernel([
use pim_isa::{BankMode, File, Instruction, Kernel};
use super::{
array::{DummyArray, PimMatrixArena, PimRegion},
state::PimState,
};
pub const MATRIX_ADD: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
@@ -41,28 +48,6 @@ pub const TEST_KERNEL: Kernel = Kernel([
dst: File::GrfB { index: 1 },
aam: false,
},
// Instruction::MOV {
// src: File::Bank,
// dst: File::GrfA { index: 1 },
// },
// Instruction::MOV {
// src: File::Bank,
// dst: File::GrfB { index: 1 },
// },
// Instruction::MAC {
// src0: File::Bank,
// src1: File::GrfA { index: 0 },
// src2: File::GrfB { index: 0 },
// dst: File::GrfB { index: 0 },
// aam: false,
// },
// Instruction::MAC {
// src0: File::Bank,
// src1: File::GrfA { index: 1 },
// src2: File::GrfB { index: 1 },
// dst: File::GrfB { index: 1 },
// aam: false,
// },
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
@@ -101,59 +86,239 @@ pub const TEST_KERNEL: Kernel = Kernel([
Instruction::NOP,
]);
// pub const TEST_KERNEL: Kernel = Kernel([
// Instruction::MOV {
// src: File::Bank,
// dst: File::GrfA { index: 0 },
// },
// Instruction::MOV {
// src: File::Bank,
// dst: File::GrfA { index: 1 },
// },
// Instruction::ADD {
// src0: File::Bank,
// src1: File::GrfA { index: 0 },
// dst: File::GrfA { index: 0 },
// aam: false,
// },
// Instruction::ADD {
// src0: File::Bank,
// src1: File::GrfA { index: 1 },
// dst: File::GrfA { index: 1 },
// aam: false,
// },
// Instruction::FILL {
// src: File::GrfA { index: 0 },
// dst: File::Bank,
// },
// Instruction::FILL {
// src: File::GrfA { index: 1 },
// dst: File::Bank,
// },
// Instruction::EXIT,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// Instruction::NOP,
// ]);
pub fn execute_matrix_add(
pim_matrix_arena0: &RefCell<PimMatrixArena<8, 8>>,
pim_matrix_arena1: &RefCell<PimMatrixArena<8, 8>>,
dummy_array: &DummyArray,
) {
// pim_matrix_arena0
// .borrow()
// .execute_instruction_read_dual_bank();
// pim_matrix_arena1
// .borrow()
// .execute_instruction_read_dual_bank();
// pim_matrix_arena0
// .borrow_mut()
// .execute_instruction_write_dual_bank();
// dummy_array.execute_instruction_read_single_bank();
}
pub const MATRIX_MUL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 1 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 2 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 3 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 4 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 5 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 6 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 7 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute_matrix_multiply<const R: usize, const C: usize>(
pim_state: &mut PimState,
pim_matrix_arena0: &RefCell<PimMatrixArena<R, C>>,
pim_matrix_arena1: &RefCell<PimMatrixArena<R, C>>,
pim_matrix_arena2: &RefCell<PimMatrixArena<R, C>>,
dummy_array: &DummyArray,
) {
pim_state.set_bank_mode(BankMode::PimAllBank);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(0);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(1);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(2);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(3);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(4);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(5);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(6);
pim_matrix_arena0
.borrow()
.execute_instruction_read_single_bank(7);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(0);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(1);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(2);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(3);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(4);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(5);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(6);
pim_matrix_arena1
.borrow()
.execute_instruction_read_single_bank(7);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(0);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(1);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(2);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(3);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(4);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(5);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(6);
pim_matrix_arena2
.borrow_mut()
.execute_instruction_write_single_bank(7);
dummy_array.execute_instruction_read_single_bank(0);
pim_state.set_bank_mode(BankMode::SingleBank);
}

View File

@@ -27,10 +27,21 @@ impl PimState {
}
}
self.writer.write(
serde_json_core::to_string::<PimConfig, 64>(&PimConfig {
kernel: None,
bank_mode: Some(bank_mode),
})
.unwrap()
.as_str(),
);
}
pub fn set_kernel(&mut self) {
self.writer.write(
serde_json_core::to_string::<PimConfig, 2048>(&PimConfig {
kernel: self.kernel.clone(),
bank_mode,
kernel: Some(self.kernel.clone()),
bank_mode: None,
})
.unwrap()
.as_str(),

View File

@@ -63,9 +63,22 @@ impl core::ops::MulAssign<F16x1> for F16x1 {
}
#[repr(C)]
#[derive(Default, Clone, Copy, Debug, PartialEq)]
#[derive(Default, Clone, Copy, PartialEq)]
pub struct F16x16(pub [F16x1; FLOATING_POINT_UNITS]);
// TODO remove
impl core::fmt::Debug for F16x16 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.0[0].fmt(f)
}
}
impl core::fmt::Display for F16x16 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.0[0].fmt(f)
}
}
impl num_traits::identities::Zero for F16x16 {
fn zero() -> Self {
Self([F16x1::zero(); FLOATING_POINT_UNITS])