Implement pim-os and pim-vm

This commit is contained in:
2025-08-06 15:18:59 +02:00
commit b326516f02
49 changed files with 3594 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
target

65
pim-isa/Cargo.lock generated Normal file
View File

@@ -0,0 +1,65 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "pim-isa"
version = "0.1.0"
dependencies = [
"serde",
]
[[package]]
name = "proc-macro2"
version = "1.0.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

7
pim-isa/Cargo.toml Normal file
View File

@@ -0,0 +1,7 @@
[package]
name = "pim-isa"
version = "0.1.0"
edition = "2021"
[dependencies]
serde = { version = "1.0", default-features = false, features = ["derive"] }

76
pim-isa/src/lib.rs Normal file
View File

@@ -0,0 +1,76 @@
#![no_std]
use serde::{Deserialize, Serialize};
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub enum Instruction {
NOP,
EXIT,
JUMP {
offset: i16,
count: u16,
},
MOV {
src: File,
dst: File,
},
FILL {
src: File,
dst: File,
},
ADD {
src0: File,
src1: File,
dst: File,
aam: bool,
},
MUL {
src0: File,
src1: File,
dst: File,
aam: bool,
},
MAC {
src0: File,
src1: File,
src2: File,
dst: File,
aam: bool,
},
MAD {
src0: File,
src1: File,
src2: File,
dst: File,
aam: bool,
},
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum File {
GrfA { index: u8 },
GrfB { index: u8 },
SrfM { index: u8 },
SrfA { index: u8 },
Bank,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Kernel(pub [Instruction; 32]);
impl Kernel {
pub const NOP: Kernel = Kernel([Instruction::NOP; 32]);
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PimConfig {
pub bank_mode: Option<BankMode>,
pub kernel: Option<Kernel>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum BankMode {
SingleBank,
AllBank,
PimAllBank,
}

331
pim-os/Cargo.lock generated Normal file
View File

@@ -0,0 +1,331 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aarch64-cpu"
version = "9.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac42a04a61c19fc8196dd728022a784baecc5d63d7e256c01ad1b3fbfab26287"
dependencies = [
"tock-registers",
]
[[package]]
name = "approx"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
dependencies = [
"num-traits",
]
[[package]]
name = "atomic-polyfill"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
dependencies = [
"critical-section",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "critical-section"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216"
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "embedded-alloc"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddae17915accbac2cfbc64ea0ae6e3b330e6ea124ba108dada63646fd3c6f815"
dependencies = [
"critical-section",
"linked_list_allocator",
]
[[package]]
name = "half"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
dependencies = [
"cfg-if",
"crunchy",
]
[[package]]
name = "hash32"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
dependencies = [
"byteorder",
]
[[package]]
name = "heapless"
version = "0.7.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
dependencies = [
"atomic-polyfill",
"hash32",
"rustc_version",
"spin",
"stable_deref_trait",
]
[[package]]
name = "linked_list_allocator"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286"
[[package]]
name = "lock_api"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "nalgebra"
version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "307ed9b18cc2423f29e83f84fd23a8e73628727990181f18641a8b5dc2ab1caa"
dependencies = [
"approx",
"num-complex",
"num-rational",
"num-traits",
"simba",
"typenum",
]
[[package]]
name = "num-complex"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214"
dependencies = [
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]]
name = "pim-isa"
version = "0.1.0"
dependencies = [
"serde",
]
[[package]]
name = "pim-os"
version = "0.1.0"
dependencies = [
"aarch64-cpu",
"critical-section",
"embedded-alloc",
"half",
"nalgebra",
"num-traits",
"pim-isa",
"serde",
"serde-json-core",
]
[[package]]
name = "proc-macro2"
version = "1.0.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "semver"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
[[package]]
name = "serde"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde-json-core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c9e1ab533c0bc414c34920ec7e5f097101d126ed5eac1a1aac711222e0bbb33"
dependencies = [
"heapless",
"ryu",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "simba"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae"
dependencies = [
"approx",
"num-complex",
"num-traits",
"paste",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
dependencies = [
"lock_api",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tock-registers"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c"
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

31
pim-os/Cargo.toml Normal file
View File

@@ -0,0 +1,31 @@
cargo-features = ["per-package-target"]
[package]
name = "pim-os"
version = "0.1.0"
edition = "2021"
forced-target = "aarch64-unknown-none"
[features]
cacheless = []
X1 = []
X2 = []
X3 = []
X4 = []
[dependencies]
aarch64-cpu = "9.4.0"
half = { version = "2.3.1", default-features = false }
nalgebra = { version = "0.32.3", default-features = false }
pim-isa = { path = "../pim-isa", default-features = false }
serde-json-core = "0.5.1"
serde = { version = "1.0", default-features = false, features = ["derive"] }
num-traits = { version = "0.2.17", default-features = false }
embedded-alloc = "0.5.1"
critical-section = "1.1.2"
[profile.dev]
panic = "abort"
[profile.release]
panic = "abort"

24
pim-os/aarch64-gem5.ld Normal file
View File

@@ -0,0 +1,24 @@
MEMORY
{
bootmem : ORIGIN = 0x0, LENGTH = 0x100000
dram : ORIGIN = 0x80000000, LENGTH = 0x40000000
dram_pim_config : ORIGIN = 0xC0000000, LENGTH = 0x4000
dram_pim_data : ORIGIN = 0xC0004000, LENGTH = 0x3FFFC000
}
ENTRY(_start)
SECTIONS
{
.init : { *(.init) } > bootmem
.text : { KEEP(*(.text)) } > dram
.data : { *(.data) } > dram
.rodata : { *(.rodata) } > dram
.bss : { *(.bss) } > dram
. = ALIGN(8);
. = . + 0x10000000; # 100 MiB Stack
LD_STACK_PTR = .;
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config
.pim_data : { KEEP(*(.pim_data)) } > dram_pim_data
}

15
pim-os/build.rs Normal file
View File

@@ -0,0 +1,15 @@
use std::env;
use std::fs;
use std::path::PathBuf;
const LINKER_SCRIPT: &str = "aarch64-gem5.ld";
fn main() {
// Put `aarch64-gem5.ld` in our output directory and ensure it's
// on the linker search path.
let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap());
fs::copy(LINKER_SCRIPT, out.join(LINKER_SCRIPT)).unwrap();
println!("cargo:rustc-link-search={}", out.display());
println!("cargo:rerun-if-changed={LINKER_SCRIPT}");
println!("cargo:rustc-link-arg=-T{LINKER_SCRIPT}");
}

View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"

18
pim-os/src/alloc.rs Normal file
View File

@@ -0,0 +1,18 @@
extern crate alloc;
use core::mem::MaybeUninit;
use embedded_alloc::Heap;
#[global_allocator]
static PIM_ALLOC: Heap = Heap::empty();
const PIM_ARENA_SIZE: usize = 0x2000000;
#[link_section = ".pim_data"]
static mut PIM_ARENA: [MaybeUninit<u8>; PIM_ARENA_SIZE] = [MaybeUninit::uninit(); PIM_ARENA_SIZE];
pub fn init() {
unsafe {
PIM_ALLOC.init(PIM_ARENA.as_ptr() as usize, PIM_ARENA_SIZE);
}
}

67
pim-os/src/bin/gemv.rs Normal file
View File

@@ -0,0 +1,67 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use nalgebra::{SMatrix, SVector};
use num_traits::{One, Zero};
use pim_isa::BankMode;
use pim_os::{
kernel::gemv,
pim::{
self, interleaved_array,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
const ROWS: usize = 128;
const COLUMNS: usize = 128;
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL);
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
matrix.fill_lower_triangle(F16x1::one(), 0);
let pim_matrix = Box::new(pim::continuous_array::Matrix::<X16_ROWS, X16_COLUMNS>::from(matrix));
let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one());
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector));
let mut output_partial_sum_vector = Box::new(SVector::<F16x16, ROWS>::zeros());
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
pim::state::set_bank_mode(BankMode::PimAllBank);
gemv::execute(
pim_matrix.as_ref(),
interleaved_input_vector.as_ref(),
output_partial_sum_vector.as_mut(),
dummy.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector[r]
.0
.iter()
.fold(F16x1::zero(), |acc, val| acc + *val)
});
core::hint::black_box(output_vector);
writeln!(Uart0, "{output_vector}").unwrap();
writeln!(Uart0, "Done").unwrap();
}

64
pim-os/src/bin/haxpy.rs Normal file
View File

@@ -0,0 +1,64 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::haxpy,
pim::{
self, interleaved_array,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
const ROWS: usize = 256;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&haxpy::KERNEL);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
));
let scalar_vector = SVector::<F16x16, 1>::from_element(F16x16([F16x1(f16::NEG_ONE); 16]));
let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector));
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
pim::state::set_bank_mode(BankMode::PimAllBank);
haxpy::execute::<ROWS, BLOCKS>(
&a.0,
&b.0,
&interleaved_scalar_vector,
&mut c.0,
dummy.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,78 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::{boxed::Box, rc::Rc};
use core::{cell::RefCell, fmt::Write};
use half::f16;
use nalgebra::Matrix;
use pim_isa::BankMode;
use pim_os::{
pim::{
self,
array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS},
kernel::matrix_matrix_add,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&matrix_matrix_add::KERNEL);
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let mut matrix0 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena0,
index: 0,
});
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
let mut matrix1 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena1,
index: 0,
});
matrix1.fill_lower_triangle(F16x1(f16::ONE), 0);
let matrix2 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena2,
index: 0,
});
writeln!(Uart0, "{matrix0} + {matrix1}\n=").unwrap();
let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
{
let pim_matrix_arena0 = &pim_matrix_arena0.borrow();
let pim_matrix_arena1 = &pim_matrix_arena1.borrow();
let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut();
pim::state::set_bank_mode(BankMode::PimAllBank);
matrix_matrix_add::execute(
pim_matrix_arena0,
pim_matrix_arena1,
pim_matrix_arena2,
dummy_array.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
}
writeln!(Uart0, "{matrix2}").unwrap();
}

View File

@@ -0,0 +1,79 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::{boxed::Box, rc::Rc};
use pim_isa::BankMode;
use core::{cell::RefCell, fmt::Write};
use half::f16;
use nalgebra::Matrix;
use pim_os::{
pim::{
self,
array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS},
kernel::matrix_matrix_mul,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&matrix_matrix_mul::KERNEL);
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let mut matrix0 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena0,
index: 0,
});
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
let mut matrix1 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena1,
index: 0,
});
matrix1.fill_lower_triangle(F16x1(f16::ONE), 0);
let matrix2 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena2,
index: 0,
});
writeln!(Uart0, "{matrix0} * {matrix1}\n=").unwrap();
let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
let pim_matrix_arena0 = &pim_matrix_arena0.borrow();
let pim_matrix_arena1 = &pim_matrix_arena1.borrow();
let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut();
pim::state::set_bank_mode(BankMode::PimAllBank);
matrix_matrix_mul::execute(
pim_matrix_arena0,
pim_matrix_arena1,
pim_matrix_arena2,
dummy_array.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
}
writeln!(Uart0, "{matrix2}").unwrap();
}

View File

@@ -0,0 +1,73 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::{boxed::Box, rc::Rc};
use core::{cell::RefCell, fmt::Write};
use half::f16;
use nalgebra::Matrix;
use pim_isa::BankMode;
use pim_os::{
pim::{
self,
array::{DummyArray, PimMatrixArena, PimScalarArena, PimStorage, NUMBER_OF_BANKS},
kernel::matrix_scalar_mul,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&matrix_scalar_mul::KERNEL);
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let mut matrix0 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena0,
index: 0,
});
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
let matrix1 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena1,
index: 0,
});
let pim_scalar_arena = Box::new(PimScalarArena(
[F16x16([F16x1(f16::from_f32(2.0)); 16]); 32],
));
writeln!(Uart0, "{} * {matrix0}\n=", pim_scalar_arena.0[0].0[0]).unwrap();
let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
let pim_matrix_arena0 = &pim_matrix_arena0.borrow();
let pim_matrix_arena1 = &mut pim_matrix_arena1.borrow_mut();
pim::state::set_bank_mode(BankMode::PimAllBank);
matrix_scalar_mul::execute(
pim_scalar_arena.as_ref(),
pim_matrix_arena0,
pim_matrix_arena1,
dummy_array.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
}
writeln!(Uart0, "{matrix1}").unwrap();
}

View File

@@ -0,0 +1,79 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::{boxed::Box, rc::Rc};
use core::{cell::RefCell, fmt::Write};
use half::f16;
use nalgebra::Matrix;
use pim_isa::BankMode;
use pim_os::{
pim::{
self,
array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS},
kernel::matrix_vector_mul,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&matrix_vector_mul::KERNEL);
let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8],
)));
let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 1],
)));
let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena(
[[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 1],
)));
let mut matrix0 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena0,
index: 0,
});
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
let mut matrix1 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena1,
index: 0,
});
matrix1.fill_lower_triangle(F16x1(f16::ONE), 0);
let matrix2 = Matrix::from_data(PimStorage {
arena: &pim_matrix_arena2,
index: 0,
});
writeln!(Uart0, "{matrix0} * {matrix1}\n=").unwrap();
let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS]));
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
let pim_matrix_arena0 = &pim_matrix_arena0.borrow();
let pim_matrix_arena1 = &pim_matrix_arena1.borrow();
let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut();
pim::state::set_bank_mode(BankMode::PimAllBank);
matrix_vector_mul::execute(
pim_matrix_arena0,
pim_matrix_arena1,
pim_matrix_arena2,
dummy_array.as_ref(),
);
pim::state::set_bank_mode(BankMode::SingleBank);
}
writeln!(Uart0, "{matrix2}").unwrap();
}

52
pim-os/src/bin/vadd.rs Normal file
View File

@@ -0,0 +1,52 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::vadd,
pim::{self, vector::F16x1},
uart::Uart0,
};
const ROWS: usize = 256;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&vadd::KERNEL);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
));
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
pim::state::set_bank_mode(BankMode::PimAllBank);
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
pim::state::set_bank_mode(BankMode::SingleBank);
writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

52
pim-os/src/bin/vmul.rs Normal file
View File

@@ -0,0 +1,52 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::vmul,
pim::{self, vector::F16x1},
uart::Uart0,
};
const ROWS: usize = 256;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&vmul::KERNEL);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(2 as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(3 as _))),
));
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
pim::state::set_bank_mode(BankMode::PimAllBank);
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
pim::state::set_bank_mode(BankMode::SingleBank);
writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

3
pim-os/src/boot.rs Normal file
View File

@@ -0,0 +1,3 @@
use core::arch::global_asm;
global_asm!(include_str!("start.s"));

View File

@@ -0,0 +1,14 @@
use critical_section::RawRestoreState;
struct CriticalSection;
critical_section::set_impl!(CriticalSection);
unsafe impl critical_section::Impl for CriticalSection {
unsafe fn acquire() -> RawRestoreState {
// no special implementation as interrupts are not used in the project
}
unsafe fn release(_token: RawRestoreState) {
// no special implementation as interrupts are not used in the project
}
}

5
pim-os/src/kernel.rs Normal file
View File

@@ -0,0 +1,5 @@
pub mod gemv;
pub mod haxpy;
pub mod legacy;
pub mod vadd;
pub mod vmul;

105
pim-os/src/kernel/gemv.rs Normal file
View File

@@ -0,0 +1,105 @@
use crate::pim::{
continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16,
};
use aarch64_cpu::asm::barrier;
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const X16R: usize, const R: usize>(
matrix: &Matrix<X16R, 8>,
input_vector: &interleaved_array::Vector<8>,
output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand,
) {
for block in input_vector.0.iter() {
block.execute_read();
}
for sub_matrix in matrix.0.iter() {
for column_block in sub_matrix.fixed_rows::<1>(0).iter() {
column_block.execute_read_async();
}
}
barrier::dsb(barrier::SY);
for chunk in output_partial_sum_vector
.fixed_rows_with_step_mut::<X16R>(0, 16)
.iter_mut()
{
chunk.execute_write();
}
dummy.execute_read();
}

View File

@@ -0,0 +1,77 @@
use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 0 },
src2: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
interleaved_scalar: &interleaved_array::Vector<1>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
interleaved_scalar.execute_read();
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

View File

@@ -0,0 +1,3 @@
pub mod matrix_matrix_mul;
pub mod matrix_scalar_mul;
pub mod matrix_vector_mul;

View File

@@ -0,0 +1,148 @@
use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion};
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 4 },
dst: File::GrfA { index: 4 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 5 },
dst: File::GrfA { index: 5 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 6 },
dst: File::GrfA { index: 6 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 7 },
dst: File::GrfA { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const C: usize>(
pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &PimMatrixArena<R, C>,
pim_matrix_arena2: &mut PimMatrixArena<R, C>,
dummy_array: &DummyArray,
) {
for column in 0..C {
for row in 0..R {
pim_matrix_arena0.execute_instruction_read_single_bank(column * R + row);
}
for row in 0..R {
pim_matrix_arena1.execute_instruction_read_single_bank(column * R + row);
}
for row in 0..R {
pim_matrix_arena2.execute_instruction_write_single_bank(column * R + row);
}
dummy_array.execute_instruction_read_single_bank(0);
}
}

View File

@@ -0,0 +1,154 @@
use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion};
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 63,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
// Vlt in der Thesis kurz erwähnen und dann zu AAM überleiten
// pub fn execute_matrix_multiply_elementwise<const R: usize, const C: usize>(
// pim_state: &mut PimState,
// pim_matrix_arena0: &mut PimMatrixArena<R, C>,
// pim_matrix_arena1: &mut PimMatrixArena<R, C>,
// pim_matrix_arena2: &mut PimMatrixArena<R, C>,
// dummy_array: &mut DummyArray,
// ) {
// set_bank_mode(BankMode::PimAllBank);
// for i in 0..(R * C) {
// let start_column = i % R;
// let start_row = (i / R) * R;
// for j in 0..C {
// pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j);
// }
// for j in 0..R {
// pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j);
// }
// pim_matrix_arena2.execute_instruction_write_single_bank(i);
// dummy_array.execute_instruction_read_single_bank(0);
// }
// set_bank_mode(BankMode::SingleBank);
// }
const MATRIX_DIMENSION: usize = 8;
pub fn execute(
pim_matrix_arena0: &PimMatrixArena<MATRIX_DIMENSION, MATRIX_DIMENSION>,
pim_matrix_arena1: &PimMatrixArena<MATRIX_DIMENSION, MATRIX_DIMENSION>,
pim_matrix_arena2: &mut PimMatrixArena<MATRIX_DIMENSION, MATRIX_DIMENSION>,
dummy_array: &DummyArray,
) {
for row in 0..MATRIX_DIMENSION {
for i in 0..MATRIX_DIMENSION {
pim_matrix_arena0.execute_instruction_read_single_bank(row + MATRIX_DIMENSION * i);
}
for column in 0..MATRIX_DIMENSION {
for i in 0..MATRIX_DIMENSION {
pim_matrix_arena1.execute_instruction_read_single_bank_unsynchronized(
column * MATRIX_DIMENSION + i,
);
}
}
for column in 0..MATRIX_DIMENSION {
pim_matrix_arena2
.execute_instruction_write_single_bank(column * MATRIX_DIMENSION + row);
}
dummy_array.execute_instruction_read_single_bank(0);
}
}

View File

@@ -0,0 +1,125 @@
use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion, PimScalarArena};
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 4 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 5 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 6 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::SrfM { index: 0 },
dst: File::GrfA { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const C: usize>(
pim_scalar_arena: &PimScalarArena,
pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &mut PimMatrixArena<R, C>,
dummy_array: &DummyArray,
) {
for column in 0..C {
pim_scalar_arena.execute_instruction_read_single_bank(0);
for i in 0..R {
pim_matrix_arena0.execute_instruction_read_single_bank(column * R + i);
}
for i in 0..R {
pim_matrix_arena1.execute_instruction_write_single_bank(column * R + i);
}
dummy_array.execute_instruction_read_single_bank(0);
}
}

View File

@@ -0,0 +1,94 @@
use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion};
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const C: usize>(
pim_matrix_arena0: &PimMatrixArena<R, C>,
pim_matrix_arena1: &PimMatrixArena<C, 1>,
pim_matrix_arena2: &mut PimMatrixArena<C, 1>,
dummy_array: &DummyArray,
) {
for row in 0..R {
for i in 0..C {
pim_matrix_arena0.execute_instruction_read_single_bank(row + R * i);
}
for i in 0..R {
pim_matrix_arena1.execute_instruction_read_single_bank(i);
}
pim_matrix_arena2.execute_instruction_write_single_bank(row);
dummy_array.execute_instruction_read_single_bank(0);
}
}

68
pim-os/src/kernel/vadd.rs Normal file
View File

@@ -0,0 +1,68 @@
use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

68
pim-os/src/kernel/vmul.rs Normal file
View File

@@ -0,0 +1,68 @@
use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

32
pim-os/src/lib.rs Normal file
View File

@@ -0,0 +1,32 @@
#![feature(iter_array_chunks)]
#![no_std]
use core::sync::atomic::{compiler_fence, Ordering};
mod alloc;
mod panic;
pub mod boot;
pub mod critical_section;
pub mod kernel;
pub mod m5op;
pub mod memory_config;
pub mod pim;
pub mod uart;
extern "C" {
fn main();
}
#[no_mangle]
pub extern "C" fn entry() -> ! {
alloc::init();
unsafe { main() }
m5op::exit(0);
loop {
compiler_fence(Ordering::SeqCst);
}
}

26
pim-os/src/m5op.rs Normal file
View File

@@ -0,0 +1,26 @@
use core::arch::global_asm;
global_asm!(include_str!("m5op.s"));
extern "C" {
fn m5_exit(delay_ns: u64);
fn m5_reset_stats(delay_ns: u64, period_ns: u64);
fn m5_dump_stats(delay_ns: u64, period_ns: u64);
fn m5_dump_reset_stats(delay_ns: u64, period_ns: u64);
}
pub fn exit(delay_ns: u64) {
unsafe { m5_exit(delay_ns) }
}
pub fn reset_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_reset_stats(delay_ns, period_ns) }
}
pub fn dump_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_dump_stats(delay_ns, period_ns) }
}
pub fn dump_reset_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_dump_reset_stats(delay_ns, period_ns) }
}

52
pim-os/src/m5op.s Normal file
View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2010-2013, 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2003-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.macro m5op_func, name, func
.globl \name
\name:
.long 0xff000110 | (\func << 16)
ret
.endm
.text
m5op_func m5_exit, 0x21;
m5op_func m5_reset_stats, 0x40;
m5op_func m5_dump_stats, 0x41;
m5op_func m5_dump_reset_stats, 0x42;

View File

@@ -0,0 +1 @@
pub const NUMBER_OF_BANKS: usize = 32;

15
pim-os/src/panic.rs Normal file
View File

@@ -0,0 +1,15 @@
use crate::uart::Uart0;
use core::{
fmt::Write,
panic::PanicInfo,
sync::atomic::{compiler_fence, Ordering},
};
#[panic_handler]
fn panic(info: &PanicInfo) -> ! {
writeln!(Uart0, "{info}").unwrap();
loop {
compiler_fence(Ordering::SeqCst);
}
}

7
pim-os/src/pim.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod config;
pub mod continuous_array;
pub mod interleaved_array;
pub mod legacy;
pub mod operation;
pub mod state;
pub mod vector;

41
pim-os/src/pim/config.rs Normal file
View File

@@ -0,0 +1,41 @@
use aarch64_cpu::asm::barrier;
use core::{
arch::asm,
ptr::{addr_of_mut, write_volatile},
};
#[link_section = ".pim_config"]
static mut PIM_CONFIG_REGION: [u8; 0x4000] = [0; 0x4000];
pub fn write(s: &str) {
unsafe {
let mut index = 0;
for &byte in s.as_bytes() {
write_volatile(
(addr_of_mut!(PIM_CONFIG_REGION) as *mut u8).offset(index),
byte as _,
);
barrier::dsb(barrier::SY);
index += 1;
}
write_volatile(
(addr_of_mut!(PIM_CONFIG_REGION) as *mut u8).offset(index),
b'\0',
);
// PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes());
// PIM_CONFIG_REGION[s.len()] = b'\0';
if cfg!(feature = "cacheless") {
// Be pessimistic so that config region is not optimized away
core::hint::black_box(PIM_CONFIG_REGION);
} else {
// Flush all cache lines that were affected by write operation
for element in PIM_CONFIG_REGION[..s.len()].iter() {
asm!("dc civac, {val}", val = in(reg) element);
}
barrier::dsb(barrier::SY);
}
}
}

View File

@@ -0,0 +1,38 @@
use super::vector::{F16x1, F16x16};
use core::fmt::Display;
use nalgebra::{SMatrix, SVector};
#[repr(C, align(65536))]
#[derive(Debug)]
pub struct Matrix<const X16R: usize, const X16C: usize>(pub [SMatrix<F16x16, 16, X16C>; X16R]);
#[repr(C, align(1024))]
#[derive(Debug)]
pub struct Vector<const R: usize>(pub SVector<F16x1, R>);
impl<const X16R: usize, const X16C: usize> Display for Matrix<X16R, X16C> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
for block in self.0.iter() {
block.fmt(f)?
}
Ok(())
}
}
impl<const R: usize, const X16R: usize, const C: usize, const X16C: usize>
From<SMatrix<F16x1, R, C>> for Matrix<X16R, X16C>
{
fn from(matrix: SMatrix<F16x1, R, C>) -> Self {
Self(core::array::from_fn(|i| {
SMatrix::from_row_iterator(
matrix
.fixed_rows::<16>(i * 16)
.transpose()
.iter()
.map(|e| *e)
.array_chunks::<16>()
.map(|chunk| F16x16(chunk)),
)
}))
}
}

View File

@@ -0,0 +1,28 @@
use super::vector::F16x16;
use crate::memory_config::NUMBER_OF_BANKS;
use nalgebra::SVector;
#[repr(C, align(512))]
#[derive(Debug)]
pub struct Vector<const X16R: usize>(pub [[F16x16; NUMBER_OF_BANKS]; X16R]);
impl<const X16R: usize> Default for Vector<X16R> {
fn default() -> Self {
Self([[F16x16::default(); NUMBER_OF_BANKS]; X16R])
}
}
impl<const X16R: usize> From<SVector<F16x16, X16R>> for Vector<X16R> {
fn from(input_vector: SVector<F16x16, X16R>) -> Self {
let mut interleaved_vector = Self::default();
for block_index in 0..X16R {
let element = input_vector[block_index];
for k in 0..NUMBER_OF_BANKS {
interleaved_vector.0[block_index][k] = element;
}
}
interleaved_vector
}
}

1
pim-os/src/pim/legacy.rs Normal file
View File

@@ -0,0 +1 @@
pub mod array;

View File

@@ -0,0 +1,227 @@
use crate::{
memory_config::NUMBER_OF_BANKS,
pim::vector::{F16x1, F16x16},
};
use aarch64_cpu::asm::barrier;
use core::{arch::asm, cell::RefCell};
use half::f16;
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut};
const EVEN_BANK_INDEX: usize = 0;
const ODD_BANK_INDEX: usize = 8;
#[derive(Clone, Debug)]
#[repr(C, align(65536))]
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[[F16x16; NUMBER_OF_BANKS]; R]; C]);
impl<const R: usize, const C: usize> PimRegion for PimMatrixArena<R, C> {
const OCCUPIED_CACHE_LINES: usize = R * C * NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 }
}
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 }
}
}
#[derive(Clone, Debug)]
#[repr(C, align(1024))]
pub struct PimScalarArena(pub [F16x16; NUMBER_OF_BANKS]);
impl PimRegion for PimScalarArena {
const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 }
}
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 }
}
}
#[derive(Debug)]
pub struct PimStorage<'a, const R: usize, const C: usize> {
pub arena: &'a RefCell<PimMatrixArena<R, C>>,
pub index: usize,
}
unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Const<C>>
for PimStorage<'a, R, C>
{
type RStride = Dyn;
type CStride = Dyn;
fn ptr(&self) -> *const F16x1 {
unsafe { (&self.arena.borrow().0[0][0] as *const F16x16 as *const F16x1).add(self.index) }
}
fn shape(&self) -> (Const<R>, Const<C>) {
(Const::<R>, Const::<C>)
}
fn strides(&self) -> (Self::RStride, Self::CStride) {
(Dyn(16 * NUMBER_OF_BANKS), Dyn(16 * R * NUMBER_OF_BANKS))
}
fn is_contiguous(&self) -> bool {
false
}
unsafe fn as_slice_unchecked(&self) -> &[F16x1] {
panic!("PimStorage is not contiguous!");
}
}
unsafe impl<'a, const R: usize, const C: usize> RawStorageMut<F16x1, Const<R>, Const<C>>
for PimStorage<'a, R, C>
{
fn ptr_mut(&mut self) -> *mut F16x1 {
unsafe {
(&mut self.arena.borrow_mut().0[0][0] as *mut F16x16 as *mut F16x1).add(self.index)
}
}
unsafe fn as_mut_slice_unchecked(&mut self) -> &mut [F16x1] {
panic!("PimStorage is not contiguous!");
}
}
pub trait PimRegion {
const OCCUPIED_CACHE_LINES: usize;
fn bank_ptr(&self, bank_index: usize) -> *const f16;
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16;
fn execute_instruction_read_single_bank(&self, i: usize) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
fn execute_instruction_read_single_bank_unsynchronized(&self, i: usize) {
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
}
fn execute_instruction_read_dual_bank(&self) {
let i = 0;
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.read_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
fn read_data_bank(&self, bank_index: usize) {
let bank = self.bank_ptr(bank_index);
// writeln!(&mut crate::uart::Uart0 {}, "Read data {:?}", bank).unwrap();
unsafe { core::ptr::read_volatile(bank) };
}
fn execute_instruction_write_single_bank(&mut self, i: usize) {
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
}
barrier::dsb(barrier::SY);
}
fn execute_instruction_write_dual_bank(&mut self) {
let i = 0;
if !cfg!(feature = "cacheless") {
self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.preload_zero_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
barrier::dsb(barrier::SY);
}
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.write_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS);
self.invalidate_flush_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS);
}
barrier::dsb(barrier::SY);
}
fn write_data_bank(&mut self, bank_index: usize) {
let bank = self.bank_ptr_mut(bank_index);
unsafe {
core::ptr::write_volatile(bank, Default::default());
}
}
fn invalidate(&self) {
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_bank(idx));
}
fn invalidate_bank(&self, bank_index: usize) {
let bank = self.bank_ptr(bank_index);
unsafe {
asm!("dc ivac, {val}", val = in(reg) bank);
}
}
fn invalidate_flush(&self) {
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_flush_bank(idx));
}
fn invalidate_flush_bank(&self, bank_index: usize) {
let bank = self.bank_ptr(bank_index);
unsafe {
asm!("dc civac, {val}", val = in(reg) bank);
}
}
fn preload_zero(&self) {
(0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.preload_zero_bank(idx));
}
fn preload_zero_bank(&self, bank_index: usize) {
let bank = self.bank_ptr(bank_index);
unsafe {
// Preload first bank
asm!("dc zva, {val}", val = in(reg) bank);
}
}
}
#[repr(C, align(1024))]
pub struct DummyArray(pub [F16x16; NUMBER_OF_BANKS]);
impl PimRegion for DummyArray {
const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS;
fn bank_ptr(&self, bank_index: usize) -> *const f16 {
&self.0[bank_index] as *const F16x16 as *const f16
}
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
&mut self.0[bank_index] as *mut F16x16 as *mut f16
}
}

View File

@@ -0,0 +1,30 @@
use aarch64_cpu::asm::barrier;
pub trait PimOperand {
fn ptr(&self) -> *const u8;
fn ptr_mut(&mut self) -> *mut u8;
fn execute_read(&self) {
unsafe { core::ptr::read_volatile(self.ptr()) };
barrier::dsb(barrier::SY);
}
fn execute_read_async(&self) {
unsafe { core::ptr::read_volatile(self.ptr()) };
}
fn execute_write(&mut self) {
unsafe { core::ptr::write_volatile(self.ptr_mut(), Default::default()) };
barrier::dsb(barrier::SY);
}
}
impl<T> PimOperand for T {
fn ptr(&self) -> *const u8 {
core::ptr::addr_of!(*self) as *const _
}
fn ptr_mut(&mut self) -> *mut u8 {
core::ptr::addr_of_mut!(*self) as *mut _
}
}

25
pim-os/src/pim/state.rs Normal file
View File

@@ -0,0 +1,25 @@
use super::config;
use pim_isa::{BankMode, Kernel, PimConfig};
// TODO return token and return to singlebank when dropped
pub fn set_bank_mode(bank_mode: BankMode) {
config::write(
serde_json_core::to_string::<PimConfig, 64>(&PimConfig {
kernel: None,
bank_mode: Some(bank_mode),
})
.unwrap()
.as_str(),
);
}
pub fn set_kernel(kernel: &Kernel) {
config::write(
serde_json_core::to_string::<PimConfig, 2048>(&PimConfig {
kernel: Some(kernel.clone()),
bank_mode: None,
})
.unwrap()
.as_str(),
);
}

125
pim-os/src/pim/vector.rs Normal file
View File

@@ -0,0 +1,125 @@
use core::fmt::{Debug, Display};
use half::f16;
pub const ELEMENT_COUNT: usize = 16;
#[repr(C)]
#[derive(Default, Clone, Copy, PartialEq)]
pub struct F16x1(pub f16);
impl core::fmt::Debug for F16x1 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
Debug::fmt(&self.0, f)
}
}
impl core::fmt::Display for F16x1 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
Display::fmt(&self.0, f)
}
}
impl num_traits::identities::Zero for F16x1 {
fn zero() -> Self {
Self(f16::ZERO)
}
fn is_zero(&self) -> bool {
self.0 == f16::ZERO
}
}
impl num_traits::identities::One for F16x1 {
fn one() -> Self {
Self(f16::ONE)
}
}
impl core::ops::Add<F16x1> for F16x1 {
type Output = Self;
fn add(self, rhs: F16x1) -> Self::Output {
Self(self.0 + rhs.0)
}
}
impl core::ops::AddAssign<F16x1> for F16x1 {
fn add_assign(&mut self, rhs: F16x1) {
self.0 += rhs.0;
}
}
impl core::ops::Mul<F16x1> for F16x1 {
type Output = Self;
fn mul(self, rhs: F16x1) -> Self::Output {
Self(self.0 * rhs.0)
}
}
impl core::ops::MulAssign<F16x1> for F16x1 {
fn mul_assign(&mut self, rhs: F16x1) {
self.0 *= rhs.0;
}
}
#[repr(C)]
#[derive(Default, Debug, Clone, Copy, PartialEq)]
pub struct F16x16(pub [F16x1; ELEMENT_COUNT]);
impl Display for F16x16 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{:?}", self.0)
}
}
impl num_traits::identities::Zero for F16x16 {
fn zero() -> Self {
Self([F16x1::zero(); ELEMENT_COUNT])
}
fn is_zero(&self) -> bool {
self.0 == [F16x1::zero(); ELEMENT_COUNT]
}
}
impl num_traits::identities::One for F16x16 {
fn one() -> Self {
Self([F16x1::one(); ELEMENT_COUNT])
}
}
impl core::ops::Add<F16x16> for F16x16 {
type Output = Self;
fn add(self, rhs: F16x16) -> Self::Output {
Self(core::array::from_fn(|i| self.0[i] + rhs.0[i]))
}
}
impl core::ops::AddAssign<F16x16> for F16x16 {
fn add_assign(&mut self, rhs: F16x16) {
self.0
.iter_mut()
.zip(&rhs.0)
.for_each(|(left, right)| *left += *right);
}
}
impl core::ops::Mul<F16x16> for F16x16 {
type Output = Self;
fn mul(self, rhs: F16x16) -> Self::Output {
Self(core::array::from_fn(|i| self.0[i] * rhs.0[i]))
}
}
impl core::ops::MulAssign<F16x16> for F16x16 {
fn mul_assign(&mut self, rhs: F16x16) {
self.0
.iter_mut()
.zip(&rhs.0)
.for_each(|(left, right)| *left *= *right);
}
}

80
pim-os/src/start.s Normal file
View File

@@ -0,0 +1,80 @@
.extern LD_STACK_PTR
// Put a 64-bit value with little endianness.
.macro PUT_64B high, low
.word \low
.word \high
.endm
// Create an entry pointing to a next-level table.
.macro TABLE_ENTRY PA, ATTR
PUT_64B \ATTR, (\PA) + 0x3
.endm
// Create an entry for a 1GB block.
.macro BLOCK_1GB PA, ATTR_HI, ATTR_LO
PUT_64B \ATTR_HI | ((\PA) >> 32), ((\PA) & 0xC0000000) | \ATTR_LO | 0x1
.endm
// Create an entry for a 2MB block.
.macro BLOCK_2MB PA, ATTR_HI, ATTR_LO
PUT_64B \ATTR_HI, ((\PA) & 0xFFE00000) | \ATTR_LO | 0x1
.endm
.section .init
.align 12
ttb0_base:
.set ADDR, 0x000
.rept 0x02
BLOCK_1GB (ADDR << 29), 0, 0x740
.set ADDR, ADDR+2
.endr
// Cached normal DRAM region
BLOCK_1GB (ADDR << 29), 0, 0x74C
.set ADDR, ADDR+2
// Non-cached PIM DRAM region
BLOCK_1GB (ADDR << 29), 0, 0x740
.set ADDR, ADDR+2
// Map rest of Page Table to avoid undefined behavior
.rept 0x3C
BLOCK_1GB (ADDR << 29), 0, 0x74C
.set ADDR, ADDR+2
.endr
.globl _start
_start:
ldr x30, =LD_STACK_PTR
mov sp, x30
// Initialize translation table control registers
ldr x1, =0x13520 // 64GB space 4KB granularity Inner-shareable. Normal Inner and Outer Cacheable.
msr tcr_el3, x1
ldr x1, =0xFF440400
msr mair_el3, x1 // ATTR0 Device-nGnRnE ATTR1 Device. ATTR2 Normal Non-Cacheable. ATTR3 Normal Cacheable.
bl set_page_table
// Enable MMU and caches
mrs x0, sctlr_el3
orr x0, x0, #(0x1 << 2) // The C bit (data cache).
orr x0, x0, #(0x1 << 12) // The I bit (instruction cache).
orr x0, x0, #0x1 // The M bit (MMU).
msr sctlr_el3, x0
dsb sy
isb
bl entry
.globl set_page_table
set_page_table:
adr x0, ttb0_base
msr ttbr0_el3, x0
tlbi alle3
isb
ret

17
pim-os/src/uart.rs Normal file
View File

@@ -0,0 +1,17 @@
use core::{fmt::Write, ptr::write_volatile};
const UART0_ADDR: *mut u32 = 0x1c090000 as _;
#[derive(Debug)]
pub struct Uart0;
impl Write for Uart0 {
fn write_str(&mut self, s: &str) -> core::fmt::Result {
for &byte in s.as_bytes() {
unsafe {
write_volatile(UART0_ADDR, byte as _);
}
}
Ok(())
}
}

402
pim-vm/Cargo.lock generated Normal file
View File

@@ -0,0 +1,402 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "bitflags"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "cxx"
version = "1.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de00f15a6fa069c99b88c5c78c4541d0e7899a33b86f7480e23df2431fce0bc"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f3fed61d56ba497c4efef9144dfdbaa25aa58f2f6b3a7cf441d4591c583745c"
[[package]]
name = "cxxbridge-macro"
version = "1.0.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8908e380a8efd42150c017b0cfa31509fc49b6d47f7cb6b33e93ffb8f4e3661e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "env_logger"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece"
dependencies = [
"humantime",
"is-terminal",
"log",
"regex",
"termcolor",
]
[[package]]
name = "errno"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "half"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
dependencies = [
"cfg-if",
"crunchy",
]
[[package]]
name = "hermit-abi"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "is-terminal"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys",
]
[[package]]
name = "itoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "link-cplusplus"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "pim-isa"
version = "0.1.0"
dependencies = [
"serde",
]
[[package]]
name = "pim-vm"
version = "0.1.0"
dependencies = [
"cxx",
"env_logger",
"half",
"log",
"pim-isa",
"serde_json",
]
[[package]]
name = "proc-macro2"
version = "1.0.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "ryu"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "serde"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.195"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.111"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
dependencies = [
"winapi-util",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

18
pim-vm/Cargo.toml Normal file
View File

@@ -0,0 +1,18 @@
[package]
name = "pim-vm"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["staticlib"]
[features]
shared_pim_units = []
[dependencies]
cxx = "1.0.110"
env_logger = "0.10.1"
half = "2.3.1"
log = "0.4.20"
pim-isa = { path = "../pim-isa" }
serde_json = "1.0.108"

451
pim-vm/src/lib.rs Normal file
View File

@@ -0,0 +1,451 @@
use half::f16;
use pim_isa::{BankMode, File, Instruction, Kernel};
#[cxx::bridge(namespace = "pim_vm")]
mod ffi {
pub enum BankMode {
SingleBank,
AllBank,
PimAllBank,
}
extern "Rust" {
type PimVM;
fn new_pim_vm(num_banks: u32) -> Box<PimVM>;
fn reset(&mut self);
fn apply_config(&mut self, config: &str);
fn bank_mode(&self) -> BankMode;
fn execute_read(
&mut self,
bank_index: u32,
address: u32,
row: u32,
column: u32,
bank_data: &[u8],
);
fn execute_write(&mut self, bank_index: u32) -> [u8; 32];
fn init_logger();
}
}
fn init_logger() {
env_logger::init();
}
const GRF_A_BIT_OFFSET: usize = 2;
const GRF_B_BIT_OFFSET: usize = 5;
const COLUMN_BITS : usize = 7;
const BURST_LENGTH: usize = 32;
const GRF_NUM_REGISTERS: usize = 8;
const SRF_A_NUM_REGISTERS: usize = 8;
const SRF_M_NUM_REGISTERS: usize = 8;
const FP_UNITS: usize = 16;
type GrfRegister = [f16; FP_UNITS];
#[derive(Clone, Debug)]
struct PimUnit {
grf_a: [GrfRegister; GRF_NUM_REGISTERS],
grf_b: [GrfRegister; GRF_NUM_REGISTERS],
srf_a: [f16; SRF_A_NUM_REGISTERS],
srf_m: [f16; SRF_A_NUM_REGISTERS],
pc: u8,
jump_counter: Option<u16>,
}
impl PimUnit {
fn reset(&mut self) {
*self = Self::default();
}
}
impl Default for PimUnit {
fn default() -> Self {
Self {
grf_a: [[f16::ZERO; FP_UNITS]; GRF_NUM_REGISTERS],
grf_b: [[f16::ZERO; FP_UNITS]; GRF_NUM_REGISTERS],
srf_a: [f16::ZERO; SRF_A_NUM_REGISTERS],
srf_m: [f16::ZERO; SRF_M_NUM_REGISTERS],
pc: 0,
jump_counter: None,
}
}
}
#[derive(Debug)]
struct PimVM {
pim_units: Vec<PimUnit>,
bank_mode: pim_isa::BankMode,
kernel: pim_isa::Kernel,
}
impl PimVM {
fn reset(&mut self) {
for unit in self.pim_units.iter_mut() {
unit.reset();
}
}
fn apply_config(&mut self, config_str: &str) {
let config = serde_json::from_str::<pim_isa::PimConfig>(config_str).unwrap();
if let Some(kernel) = config.kernel {
self.kernel = kernel;
}
if let Some(bank_mode) = config.bank_mode {
self.bank_mode = bank_mode;
}
}
fn bank_mode(&self) -> ffi::BankMode {
match self.bank_mode {
BankMode::SingleBank => ffi::BankMode::SingleBank,
BankMode::AllBank => ffi::BankMode::AllBank,
BankMode::PimAllBank => ffi::BankMode::PimAllBank,
}
}
}
fn new_pim_vm(num_banks: u32) -> Box<PimVM> {
let num_pim_units = if cfg!(feature = "shared_pim_units") {
num_banks / 2
} else {
num_banks
};
Box::new(PimVM {
pim_units: vec![PimUnit::default(); num_pim_units as _],
bank_mode: BankMode::SingleBank,
kernel: Kernel::NOP,
})
}
#[repr(C)]
struct BankData([f16; FP_UNITS]);
impl PimVM {
pub fn execute_read(
&mut self,
bank_index: u32,
address: u32,
row: u32,
column: u32,
bank_data: &[u8],
) {
assert_eq!(bank_data.len(), BURST_LENGTH);
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
bank_index / 2
} else {
bank_index
};
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
let inst = self.kernel.0[pim_unit.pc as usize];
let row_column_bits = (row << COLUMN_BITS) | column;
let aam_grf_a_index = (row_column_bits >> GRF_A_BIT_OFFSET) & 0b111;
let aam_grf_b_index = (row_column_bits >> GRF_B_BIT_OFFSET) & 0b111;
if pim_unit_index == 0 {
log::debug!(
"PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute Read PC {}: {inst:?}",
pim_unit.pc
);
}
match inst {
Instruction::NOP => (),
Instruction::EXIT => {
pim_unit.reset();
return;
}
Instruction::JUMP { .. } => unreachable!(),
Instruction::MOV { src, dst } | Instruction::FILL { src, dst } => {
let data = PimVM::load(src, pim_unit, &bank_data);
PimVM::store(dst, pim_unit, &data);
}
Instruction::ADD {
src0,
mut src1,
mut dst,
aam,
} => {
if aam {
src1 = if let File::GrfA { index: _ } = src1 {
File::GrfA {
index: aam_grf_a_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
dst = if let File::GrfB { index: _ } = dst {
File::GrfB {
index: aam_grf_b_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
}
let data0 = PimVM::load(src0, pim_unit, &bank_data);
let data1 = PimVM::load(src1, pim_unit, &bank_data);
let sum: [f16; FP_UNITS] = data0
.into_iter()
.zip(data1)
.map(|(src0, src1)| src0 + src1)
.collect::<Vec<_>>()
.try_into()
.unwrap();
PimVM::store(dst, pim_unit, &sum);
}
Instruction::MUL {
src0,
mut src1,
mut dst,
aam,
} => {
if aam {
src1 = if let File::GrfA { index: _ } = src1 {
File::GrfA {
index: aam_grf_a_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
dst = if let File::GrfB { index: _ } = dst {
File::GrfB {
index: aam_grf_b_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
}
let data0 = PimVM::load(src0, pim_unit, &bank_data);
let data1 = PimVM::load(src1, pim_unit, &bank_data);
let product: [f16; FP_UNITS] = data0
.into_iter()
.zip(data1)
.map(|(src0, src1)| src0 * src1)
.collect::<Vec<_>>()
.try_into()
.unwrap();
PimVM::store(dst, pim_unit, &product);
}
Instruction::MAC {
src0,
mut src1,
mut src2,
mut dst,
aam,
}
| Instruction::MAD {
src0,
mut src1,
mut src2,
mut dst,
aam,
} => {
if aam {
src1 = if let File::GrfA { index: _ } = src1 {
// if pim_unit_index == 0 {
// log::debug!("AAM index GrfA {aam_grf_a_index}");
// }
File::GrfA {
index: aam_grf_a_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
src2 = if let File::GrfB { index: _ } = src2 {
// if pim_unit_index == 0 {
// log::debug!("AAM index GrfB {aam_grf_a_index}");
// }
File::GrfB {
index: aam_grf_b_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
dst = if let File::GrfB { index: _ } = dst {
File::GrfB {
index: aam_grf_b_index as _,
}
} else {
panic!("Invalid operand in address-aligned-mode");
};
}
assert_eq!(src2, dst);
let data0 = PimVM::load(src0, pim_unit, &bank_data);
let data1 = PimVM::load(src1, pim_unit, &bank_data);
let data2 = PimVM::load(src2, pim_unit, &bank_data);
let product: [f16; FP_UNITS] = data0
.into_iter()
.zip(data1)
.map(|(src0, src1)| src0 * src1)
.collect::<Vec<_>>()
.try_into()
.unwrap();
let sum: [f16; FP_UNITS] = product
.into_iter()
.zip(data2)
.map(|(product, src2)| product + src2)
.collect::<Vec<_>>()
.try_into()
.unwrap();
// if pim_unit_index == 0 {
// log::debug!(
// "\n{:?}\n{:?}\n{:?}\n{:?}\n{:?}",
// data0[0],
// data1[0],
// data2[0],
// product[0],
// sum[0]
// );
// }
PimVM::store(dst, pim_unit, &sum);
}
}
pim_unit.pc += 1;
// The JUMP instruction is zero-cycle and not actually executed
while let Instruction::JUMP { offset, count } = self.kernel.0[pim_unit.pc as usize] {
pim_unit.jump_counter = match pim_unit.jump_counter {
Some(jump_counter) => jump_counter.checked_sub(1),
None => count.checked_sub(1),
};
if pim_unit.jump_counter != None {
let new_pc = pim_unit.pc as i32 + offset as i32;
if new_pc < 0 || new_pc >= 32 {
panic!("Invalid PC {new_pc} after JUMP: {inst:?}");
}
pim_unit.pc = new_pc as _;
} else {
pim_unit.pc += 1;
}
// if pim_unit_index == 0 {
// log::debug!(
// "PimUnit {pim_unit_index} JUMP to PC {}: {:?}",
// pim_unit.pc,
// self.kernel.0[pim_unit.pc as usize]
// );
// }
}
}
pub fn execute_write(&mut self, bank_index: u32) -> [u8; BURST_LENGTH] {
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
bank_index / 2
} else {
bank_index
};
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
let inst = self.kernel.0[pim_unit.pc as usize];
if pim_unit_index == 0 {
log::debug!(
"PimUnit {pim_unit_index} Execute Write PC {}: {inst:?}",
pim_unit.pc
);
}
let data = match inst {
Instruction::FILL { src, dst } => {
let data: [f16; FP_UNITS] = match src {
File::GrfA { index } => pim_unit.grf_a[index as usize],
File::GrfB { index } => pim_unit.grf_b[index as usize],
_ => panic!("Unsupported src operand: {src:?}"),
};
if dst != File::Bank {
panic!("Unsupported dst operand: {dst:?}")
}
// if pim_unit_index == 0 {
// log::debug!("Store {data:?}");
// }
data
}
_ => panic!("Unsupported instruction for write: {inst:?}"),
};
pim_unit.pc += 1;
// The JUMP instruction is zero-cycle and not actually executed
while let Instruction::JUMP { offset, count } = self.kernel.0[pim_unit.pc as usize] {
pim_unit.jump_counter = match pim_unit.jump_counter {
Some(jump_counter) => jump_counter.checked_sub(1),
None => count.checked_sub(1),
};
if pim_unit.jump_counter != None {
let new_pc = pim_unit.pc as i32 + offset as i32;
if new_pc < 0 || new_pc >= 32 {
panic!("Invalid PC {new_pc} after JUMP: {inst:?}");
}
pim_unit.pc = new_pc as _;
} else {
pim_unit.pc += 1;
}
if pim_unit_index == 0 {
log::debug!(
"PimUnit {pim_unit_index} JUMP to PC {}: {:?}",
pim_unit.pc,
self.kernel.0[pim_unit.pc as usize]
);
}
}
unsafe { std::mem::transmute(data) }
}
fn load(src: File, pim_unit: &PimUnit, bank_data: &[u8]) -> [f16; FP_UNITS] {
match src {
File::GrfA { index } => pim_unit.grf_a[index as usize],
File::GrfB { index } => pim_unit.grf_b[index as usize],
File::SrfM { index } => [pim_unit.srf_m[index as usize]; FP_UNITS],
File::SrfA { index } => [pim_unit.srf_a[index as usize]; FP_UNITS],
File::Bank => unsafe { std::ptr::read(bank_data.as_ptr() as *const BankData).0 },
}
}
fn store(dst: File, pim_unit: &mut PimUnit, data: &[f16; FP_UNITS]) {
match dst {
File::GrfA { index } => pim_unit.grf_a[index as usize] = data.clone(),
File::GrfB { index } => pim_unit.grf_b[index as usize] = data.clone(),
File::SrfM { index } => pim_unit.srf_m[index as usize] = data[0],
File::SrfA { index } => pim_unit.srf_a[index as usize] = data[0],
File::Bank => panic!("Unsupported dst operand: {dst:?}"),
}
}
}