From b326516f025b09cbedd0eb456765fb20533674dd Mon Sep 17 00:00:00 2001 From: Derek Christ Date: Wed, 6 Aug 2025 15:18:59 +0200 Subject: [PATCH] Implement pim-os and pim-vm --- .gitignore | 1 + pim-isa/Cargo.lock | 65 +++ pim-isa/Cargo.toml | 7 + pim-isa/src/lib.rs | 76 +++ pim-os/Cargo.lock | 331 +++++++++++++ pim-os/Cargo.toml | 31 ++ pim-os/aarch64-gem5.ld | 24 + pim-os/build.rs | 15 + pim-os/rust-toolchain.toml | 2 + pim-os/src/alloc.rs | 18 + pim-os/src/bin/gemv.rs | 67 +++ pim-os/src/bin/haxpy.rs | 64 +++ pim-os/src/bin/legacy/matrix_matrix_add.rs | 78 +++ .../src/bin/legacy/matrix_matrix_multiply.rs | 79 +++ .../src/bin/legacy/matrix_scalar_multiply.rs | 73 +++ .../src/bin/legacy/matrix_vector_multiply.rs | 79 +++ pim-os/src/bin/vadd.rs | 52 ++ pim-os/src/bin/vmul.rs | 52 ++ pim-os/src/boot.rs | 3 + pim-os/src/critical_section.rs | 14 + pim-os/src/kernel.rs | 5 + pim-os/src/kernel/gemv.rs | 105 ++++ pim-os/src/kernel/haxpy.rs | 77 +++ pim-os/src/kernel/legacy.rs | 3 + pim-os/src/kernel/legacy/matrix_matrix_add.rs | 148 ++++++ pim-os/src/kernel/legacy/matrix_matrix_mul.rs | 154 ++++++ pim-os/src/kernel/legacy/matrix_scalar_mul.rs | 125 +++++ pim-os/src/kernel/legacy/matrix_vector_mul.rs | 94 ++++ pim-os/src/kernel/vadd.rs | 68 +++ pim-os/src/kernel/vmul.rs | 68 +++ pim-os/src/lib.rs | 32 ++ pim-os/src/m5op.rs | 26 + pim-os/src/m5op.s | 52 ++ pim-os/src/memory_config.rs | 1 + pim-os/src/panic.rs | 15 + pim-os/src/pim.rs | 7 + pim-os/src/pim/config.rs | 41 ++ pim-os/src/pim/continuous_array.rs | 38 ++ pim-os/src/pim/interleaved_array.rs | 28 ++ pim-os/src/pim/legacy.rs | 1 + pim-os/src/pim/legacy/array.rs | 227 +++++++++ pim-os/src/pim/operation.rs | 30 ++ pim-os/src/pim/state.rs | 25 + pim-os/src/pim/vector.rs | 125 +++++ pim-os/src/start.s | 80 ++++ pim-os/src/uart.rs | 17 + pim-vm/Cargo.lock | 402 ++++++++++++++++ pim-vm/Cargo.toml | 18 + pim-vm/src/lib.rs | 451 ++++++++++++++++++ 49 files changed, 3594 insertions(+) create mode 100644 .gitignore create mode 100644 pim-isa/Cargo.lock create mode 100644 pim-isa/Cargo.toml create mode 100644 pim-isa/src/lib.rs create mode 100644 pim-os/Cargo.lock create mode 100644 pim-os/Cargo.toml create mode 100644 pim-os/aarch64-gem5.ld create mode 100644 pim-os/build.rs create mode 100644 pim-os/rust-toolchain.toml create mode 100644 pim-os/src/alloc.rs create mode 100644 pim-os/src/bin/gemv.rs create mode 100644 pim-os/src/bin/haxpy.rs create mode 100644 pim-os/src/bin/legacy/matrix_matrix_add.rs create mode 100644 pim-os/src/bin/legacy/matrix_matrix_multiply.rs create mode 100644 pim-os/src/bin/legacy/matrix_scalar_multiply.rs create mode 100644 pim-os/src/bin/legacy/matrix_vector_multiply.rs create mode 100644 pim-os/src/bin/vadd.rs create mode 100644 pim-os/src/bin/vmul.rs create mode 100644 pim-os/src/boot.rs create mode 100644 pim-os/src/critical_section.rs create mode 100644 pim-os/src/kernel.rs create mode 100644 pim-os/src/kernel/gemv.rs create mode 100644 pim-os/src/kernel/haxpy.rs create mode 100644 pim-os/src/kernel/legacy.rs create mode 100644 pim-os/src/kernel/legacy/matrix_matrix_add.rs create mode 100644 pim-os/src/kernel/legacy/matrix_matrix_mul.rs create mode 100644 pim-os/src/kernel/legacy/matrix_scalar_mul.rs create mode 100644 pim-os/src/kernel/legacy/matrix_vector_mul.rs create mode 100644 pim-os/src/kernel/vadd.rs create mode 100644 pim-os/src/kernel/vmul.rs create mode 100644 pim-os/src/lib.rs create mode 100644 pim-os/src/m5op.rs create mode 100644 pim-os/src/m5op.s create mode 100644 pim-os/src/memory_config.rs create mode 100644 pim-os/src/panic.rs create mode 100644 pim-os/src/pim.rs create mode 100644 pim-os/src/pim/config.rs create mode 100644 pim-os/src/pim/continuous_array.rs create mode 100644 pim-os/src/pim/interleaved_array.rs create mode 100644 pim-os/src/pim/legacy.rs create mode 100644 pim-os/src/pim/legacy/array.rs create mode 100644 pim-os/src/pim/operation.rs create mode 100644 pim-os/src/pim/state.rs create mode 100644 pim-os/src/pim/vector.rs create mode 100644 pim-os/src/start.s create mode 100644 pim-os/src/uart.rs create mode 100644 pim-vm/Cargo.lock create mode 100644 pim-vm/Cargo.toml create mode 100644 pim-vm/src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target diff --git a/pim-isa/Cargo.lock b/pim-isa/Cargo.lock new file mode 100644 index 0000000..31c7f4a --- /dev/null +++ b/pim-isa/Cargo.lock @@ -0,0 +1,65 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "pim-isa" +version = "0.1.0" +dependencies = [ + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/pim-isa/Cargo.toml b/pim-isa/Cargo.toml new file mode 100644 index 0000000..86d4648 --- /dev/null +++ b/pim-isa/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "pim-isa" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/pim-isa/src/lib.rs b/pim-isa/src/lib.rs new file mode 100644 index 0000000..3f728de --- /dev/null +++ b/pim-isa/src/lib.rs @@ -0,0 +1,76 @@ +#![no_std] + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub enum Instruction { + NOP, + EXIT, + JUMP { + offset: i16, + count: u16, + }, + MOV { + src: File, + dst: File, + }, + FILL { + src: File, + dst: File, + }, + ADD { + src0: File, + src1: File, + dst: File, + aam: bool, + }, + MUL { + src0: File, + src1: File, + dst: File, + aam: bool, + }, + MAC { + src0: File, + src1: File, + src2: File, + dst: File, + aam: bool, + }, + MAD { + src0: File, + src1: File, + src2: File, + dst: File, + aam: bool, + }, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum File { + GrfA { index: u8 }, + GrfB { index: u8 }, + SrfM { index: u8 }, + SrfA { index: u8 }, + Bank, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Kernel(pub [Instruction; 32]); + +impl Kernel { + pub const NOP: Kernel = Kernel([Instruction::NOP; 32]); +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct PimConfig { + pub bank_mode: Option, + pub kernel: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum BankMode { + SingleBank, + AllBank, + PimAllBank, +} diff --git a/pim-os/Cargo.lock b/pim-os/Cargo.lock new file mode 100644 index 0000000..e895a83 --- /dev/null +++ b/pim-os/Cargo.lock @@ -0,0 +1,331 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aarch64-cpu" +version = "9.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac42a04a61c19fc8196dd728022a784baecc5d63d7e256c01ad1b3fbfab26287" +dependencies = [ + "tock-registers", +] + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-polyfill" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" +dependencies = [ + "critical-section", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "critical-section" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "embedded-alloc" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddae17915accbac2cfbc64ea0ae6e3b330e6ea124ba108dada63646fd3c6f815" +dependencies = [ + "critical-section", + "linked_list_allocator", +] + +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "heapless" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" +dependencies = [ + "atomic-polyfill", + "hash32", + "rustc_version", + "spin", + "stable_deref_trait", +] + +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "nalgebra" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307ed9b18cc2423f29e83f84fd23a8e73628727990181f18641a8b5dc2ab1caa" +dependencies = [ + "approx", + "num-complex", + "num-rational", + "num-traits", + "simba", + "typenum", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "pim-isa" +version = "0.1.0" +dependencies = [ + "serde", +] + +[[package]] +name = "pim-os" +version = "0.1.0" +dependencies = [ + "aarch64-cpu", + "critical-section", + "embedded-alloc", + "half", + "nalgebra", + "num-traits", + "pim-isa", + "serde", + "serde-json-core", +] + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" + +[[package]] +name = "serde" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde-json-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c9e1ab533c0bc414c34920ec7e5f097101d126ed5eac1a1aac711222e0bbb33" +dependencies = [ + "heapless", + "ryu", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "simba" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tock-registers" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/pim-os/Cargo.toml b/pim-os/Cargo.toml new file mode 100644 index 0000000..57631d6 --- /dev/null +++ b/pim-os/Cargo.toml @@ -0,0 +1,31 @@ +cargo-features = ["per-package-target"] + +[package] +name = "pim-os" +version = "0.1.0" +edition = "2021" +forced-target = "aarch64-unknown-none" + +[features] +cacheless = [] +X1 = [] +X2 = [] +X3 = [] +X4 = [] + +[dependencies] +aarch64-cpu = "9.4.0" +half = { version = "2.3.1", default-features = false } +nalgebra = { version = "0.32.3", default-features = false } +pim-isa = { path = "../pim-isa", default-features = false } +serde-json-core = "0.5.1" +serde = { version = "1.0", default-features = false, features = ["derive"] } +num-traits = { version = "0.2.17", default-features = false } +embedded-alloc = "0.5.1" +critical-section = "1.1.2" + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" diff --git a/pim-os/aarch64-gem5.ld b/pim-os/aarch64-gem5.ld new file mode 100644 index 0000000..d41b35a --- /dev/null +++ b/pim-os/aarch64-gem5.ld @@ -0,0 +1,24 @@ +MEMORY +{ + bootmem : ORIGIN = 0x0, LENGTH = 0x100000 + dram : ORIGIN = 0x80000000, LENGTH = 0x40000000 + dram_pim_config : ORIGIN = 0xC0000000, LENGTH = 0x4000 + dram_pim_data : ORIGIN = 0xC0004000, LENGTH = 0x3FFFC000 +} + +ENTRY(_start) +SECTIONS +{ + .init : { *(.init) } > bootmem + .text : { KEEP(*(.text)) } > dram + .data : { *(.data) } > dram + .rodata : { *(.rodata) } > dram + .bss : { *(.bss) } > dram + + . = ALIGN(8); + . = . + 0x10000000; # 100 MiB Stack + LD_STACK_PTR = .; + + .pim_config : { KEEP(*(.pim_config)) } > dram_pim_config + .pim_data : { KEEP(*(.pim_data)) } > dram_pim_data +} diff --git a/pim-os/build.rs b/pim-os/build.rs new file mode 100644 index 0000000..e95e8d6 --- /dev/null +++ b/pim-os/build.rs @@ -0,0 +1,15 @@ +use std::env; +use std::fs; +use std::path::PathBuf; + +const LINKER_SCRIPT: &str = "aarch64-gem5.ld"; + +fn main() { + // Put `aarch64-gem5.ld` in our output directory and ensure it's + // on the linker search path. + let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap()); + fs::copy(LINKER_SCRIPT, out.join(LINKER_SCRIPT)).unwrap(); + println!("cargo:rustc-link-search={}", out.display()); + println!("cargo:rerun-if-changed={LINKER_SCRIPT}"); + println!("cargo:rustc-link-arg=-T{LINKER_SCRIPT}"); +} diff --git a/pim-os/rust-toolchain.toml b/pim-os/rust-toolchain.toml new file mode 100644 index 0000000..5d56faf --- /dev/null +++ b/pim-os/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/pim-os/src/alloc.rs b/pim-os/src/alloc.rs new file mode 100644 index 0000000..08f94df --- /dev/null +++ b/pim-os/src/alloc.rs @@ -0,0 +1,18 @@ +extern crate alloc; + +use core::mem::MaybeUninit; +use embedded_alloc::Heap; + +#[global_allocator] +static PIM_ALLOC: Heap = Heap::empty(); + +const PIM_ARENA_SIZE: usize = 0x2000000; + +#[link_section = ".pim_data"] +static mut PIM_ARENA: [MaybeUninit; PIM_ARENA_SIZE] = [MaybeUninit::uninit(); PIM_ARENA_SIZE]; + +pub fn init() { + unsafe { + PIM_ALLOC.init(PIM_ARENA.as_ptr() as usize, PIM_ARENA_SIZE); + } +} diff --git a/pim-os/src/bin/gemv.rs b/pim-os/src/bin/gemv.rs new file mode 100644 index 0000000..99fc76d --- /dev/null +++ b/pim-os/src/bin/gemv.rs @@ -0,0 +1,67 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use nalgebra::{SMatrix, SVector}; +use num_traits::{One, Zero}; +use pim_isa::BankMode; +use pim_os::{ + kernel::gemv, + pim::{ + self, interleaved_array, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +const ROWS: usize = 128; +const COLUMNS: usize = 128; +const X16_ROWS: usize = ROWS / 16; +const X16_COLUMNS: usize = COLUMNS / 16; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&gemv::KERNEL); + + let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); + matrix.fill_lower_triangle(F16x1::one(), 0); + + let pim_matrix = Box::new(pim::continuous_array::Matrix::::from(matrix)); + + let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one()); + let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector)); + + let mut output_partial_sum_vector = Box::new(SVector::::zeros()); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + pim::state::set_bank_mode(BankMode::PimAllBank); + gemv::execute( + pim_matrix.as_ref(), + interleaved_input_vector.as_ref(), + output_partial_sum_vector.as_mut(), + dummy.as_ref(), + ); + pim::state::set_bank_mode(BankMode::SingleBank); + + writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + + let output_vector = SVector::::from_fn(|r, _| { + output_partial_sum_vector[r] + .0 + .iter() + .fold(F16x1::zero(), |acc, val| acc + *val) + }); + core::hint::black_box(output_vector); + + writeln!(Uart0, "{output_vector}").unwrap(); + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/haxpy.rs b/pim-os/src/bin/haxpy.rs new file mode 100644 index 0000000..77bacef --- /dev/null +++ b/pim-os/src/bin/haxpy.rs @@ -0,0 +1,64 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use half::f16; +use nalgebra::SVector; +use pim_isa::BankMode; +use pim_os::{ + kernel::haxpy, + pim::{ + self, interleaved_array, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +const ROWS: usize = 256; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&haxpy::KERNEL); + + let a = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), + )); + let b = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))), + )); + + let scalar_vector = SVector::::from_element(F16x16([F16x1(f16::NEG_ONE); 16])); + let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector)); + + writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + + let mut c = Box::new(pim::continuous_array::Vector( + SVector::::zeros(), + )); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + pim::state::set_bank_mode(BankMode::PimAllBank); + haxpy::execute::( + &a.0, + &b.0, + &interleaved_scalar_vector, + &mut c.0, + dummy.as_ref(), + ); + pim::state::set_bank_mode(BankMode::SingleBank); + + writeln!(Uart0, "{}", c.0).unwrap(); + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/legacy/matrix_matrix_add.rs b/pim-os/src/bin/legacy/matrix_matrix_add.rs new file mode 100644 index 0000000..fd31fc1 --- /dev/null +++ b/pim-os/src/bin/legacy/matrix_matrix_add.rs @@ -0,0 +1,78 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::{boxed::Box, rc::Rc}; +use core::{cell::RefCell, fmt::Write}; +use half::f16; +use nalgebra::Matrix; +use pim_isa::BankMode; +use pim_os::{ + pim::{ + self, + array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, + kernel::matrix_matrix_add, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&matrix_matrix_add::KERNEL); + + let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + + let mut matrix0 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena0, + index: 0, + }); + matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); + + let mut matrix1 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena1, + index: 0, + }); + matrix1.fill_lower_triangle(F16x1(f16::ONE), 0); + + let matrix2 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena2, + index: 0, + }); + + writeln!(Uart0, "{matrix0} + {matrix1}\n=").unwrap(); + + let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + { + let pim_matrix_arena0 = &pim_matrix_arena0.borrow(); + let pim_matrix_arena1 = &pim_matrix_arena1.borrow(); + let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut(); + + pim::state::set_bank_mode(BankMode::PimAllBank); + + matrix_matrix_add::execute( + pim_matrix_arena0, + pim_matrix_arena1, + pim_matrix_arena2, + dummy_array.as_ref(), + ); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + writeln!(Uart0, "{matrix2}").unwrap(); +} diff --git a/pim-os/src/bin/legacy/matrix_matrix_multiply.rs b/pim-os/src/bin/legacy/matrix_matrix_multiply.rs new file mode 100644 index 0000000..9b79f59 --- /dev/null +++ b/pim-os/src/bin/legacy/matrix_matrix_multiply.rs @@ -0,0 +1,79 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::{boxed::Box, rc::Rc}; +use pim_isa::BankMode; +use core::{cell::RefCell, fmt::Write}; +use half::f16; +use nalgebra::Matrix; +use pim_os::{ + pim::{ + self, + array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, + kernel::matrix_matrix_mul, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&matrix_matrix_mul::KERNEL); + + let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + + let mut matrix0 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena0, + index: 0, + }); + matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); + + let mut matrix1 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena1, + index: 0, + }); + matrix1.fill_lower_triangle(F16x1(f16::ONE), 0); + + let matrix2 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena2, + index: 0, + }); + + writeln!(Uart0, "{matrix0} * {matrix1}\n=").unwrap(); + + let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + { + let pim_matrix_arena0 = &pim_matrix_arena0.borrow(); + let pim_matrix_arena1 = &pim_matrix_arena1.borrow(); + let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut(); + + pim::state::set_bank_mode(BankMode::PimAllBank); + + matrix_matrix_mul::execute( + pim_matrix_arena0, + pim_matrix_arena1, + pim_matrix_arena2, + dummy_array.as_ref(), + ); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + writeln!(Uart0, "{matrix2}").unwrap(); +} diff --git a/pim-os/src/bin/legacy/matrix_scalar_multiply.rs b/pim-os/src/bin/legacy/matrix_scalar_multiply.rs new file mode 100644 index 0000000..c47d079 --- /dev/null +++ b/pim-os/src/bin/legacy/matrix_scalar_multiply.rs @@ -0,0 +1,73 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::{boxed::Box, rc::Rc}; +use core::{cell::RefCell, fmt::Write}; +use half::f16; +use nalgebra::Matrix; +use pim_isa::BankMode; +use pim_os::{ + pim::{ + self, + array::{DummyArray, PimMatrixArena, PimScalarArena, PimStorage, NUMBER_OF_BANKS}, + kernel::matrix_scalar_mul, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&matrix_scalar_mul::KERNEL); + + let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + + let mut matrix0 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena0, + index: 0, + }); + matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); + + let matrix1 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena1, + index: 0, + }); + + let pim_scalar_arena = Box::new(PimScalarArena( + [F16x16([F16x1(f16::from_f32(2.0)); 16]); 32], + )); + + writeln!(Uart0, "{} * {matrix0}\n=", pim_scalar_arena.0[0].0[0]).unwrap(); + + let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + { + let pim_matrix_arena0 = &pim_matrix_arena0.borrow(); + let pim_matrix_arena1 = &mut pim_matrix_arena1.borrow_mut(); + + pim::state::set_bank_mode(BankMode::PimAllBank); + + matrix_scalar_mul::execute( + pim_scalar_arena.as_ref(), + pim_matrix_arena0, + pim_matrix_arena1, + dummy_array.as_ref(), + ); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + writeln!(Uart0, "{matrix1}").unwrap(); +} diff --git a/pim-os/src/bin/legacy/matrix_vector_multiply.rs b/pim-os/src/bin/legacy/matrix_vector_multiply.rs new file mode 100644 index 0000000..46a09e5 --- /dev/null +++ b/pim-os/src/bin/legacy/matrix_vector_multiply.rs @@ -0,0 +1,79 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::{boxed::Box, rc::Rc}; +use core::{cell::RefCell, fmt::Write}; +use half::f16; +use nalgebra::Matrix; +use pim_isa::BankMode; +use pim_os::{ + pim::{ + self, + array::{DummyArray, PimMatrixArena, PimStorage, NUMBER_OF_BANKS}, + kernel::matrix_vector_mul, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&matrix_vector_mul::KERNEL); + + let pim_matrix_arena0 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 8], + ))); + let pim_matrix_arena1 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 1], + ))); + let pim_matrix_arena2 = Rc::new(RefCell::new(PimMatrixArena( + [[[F16x16::default(); NUMBER_OF_BANKS]; 8]; 1], + ))); + + let mut matrix0 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena0, + index: 0, + }); + matrix0.fill_lower_triangle(F16x1(f16::ONE), 0); + + let mut matrix1 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena1, + index: 0, + }); + matrix1.fill_lower_triangle(F16x1(f16::ONE), 0); + + let matrix2 = Matrix::from_data(PimStorage { + arena: &pim_matrix_arena2, + index: 0, + }); + + writeln!(Uart0, "{matrix0} * {matrix1}\n=").unwrap(); + + let dummy_array = Box::new(DummyArray([F16x16::default(); NUMBER_OF_BANKS])); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + { + let pim_matrix_arena0 = &pim_matrix_arena0.borrow(); + let pim_matrix_arena1 = &pim_matrix_arena1.borrow(); + let pim_matrix_arena2 = &mut pim_matrix_arena2.borrow_mut(); + + pim::state::set_bank_mode(BankMode::PimAllBank); + + matrix_vector_mul::execute( + pim_matrix_arena0, + pim_matrix_arena1, + pim_matrix_arena2, + dummy_array.as_ref(), + ); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + writeln!(Uart0, "{matrix2}").unwrap(); +} diff --git a/pim-os/src/bin/vadd.rs b/pim-os/src/bin/vadd.rs new file mode 100644 index 0000000..cdb87be --- /dev/null +++ b/pim-os/src/bin/vadd.rs @@ -0,0 +1,52 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use half::f16; +use nalgebra::SVector; +use pim_isa::BankMode; +use pim_os::{ + kernel::vadd, + pim::{self, vector::F16x1}, + uart::Uart0, +}; + +const ROWS: usize = 256; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&vadd::KERNEL); + + let a = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), + )); + let b = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))), + )); + + writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + + let mut c = Box::new(pim::continuous_array::Vector( + SVector::::zeros(), + )); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + pim::state::set_bank_mode(BankMode::PimAllBank); + vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + pim::state::set_bank_mode(BankMode::SingleBank); + + writeln!(Uart0, "{}", c.0).unwrap(); + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/vmul.rs b/pim-os/src/bin/vmul.rs new file mode 100644 index 0000000..b5ac022 --- /dev/null +++ b/pim-os/src/bin/vmul.rs @@ -0,0 +1,52 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use half::f16; +use nalgebra::SVector; +use pim_isa::BankMode; +use pim_os::{ + kernel::vmul, + pim::{self, vector::F16x1}, + uart::Uart0, +}; + +const ROWS: usize = 256; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&vmul::KERNEL); + + let a = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|_, _| F16x1(f16::from_f32(2 as _))), + )); + let b = Box::new(pim::continuous_array::Vector( + SVector::::from_fn(|_, _| F16x1(f16::from_f32(3 as _))), + )); + + writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + + let mut c = Box::new(pim::continuous_array::Vector( + SVector::::zeros(), + )); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + // Execute kernel + pim::state::set_bank_mode(BankMode::PimAllBank); + vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + pim::state::set_bank_mode(BankMode::SingleBank); + + writeln!(Uart0, "{}", c.0).unwrap(); + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/boot.rs b/pim-os/src/boot.rs new file mode 100644 index 0000000..eccbde1 --- /dev/null +++ b/pim-os/src/boot.rs @@ -0,0 +1,3 @@ +use core::arch::global_asm; + +global_asm!(include_str!("start.s")); diff --git a/pim-os/src/critical_section.rs b/pim-os/src/critical_section.rs new file mode 100644 index 0000000..9931a77 --- /dev/null +++ b/pim-os/src/critical_section.rs @@ -0,0 +1,14 @@ +use critical_section::RawRestoreState; + +struct CriticalSection; +critical_section::set_impl!(CriticalSection); + +unsafe impl critical_section::Impl for CriticalSection { + unsafe fn acquire() -> RawRestoreState { + // no special implementation as interrupts are not used in the project + } + + unsafe fn release(_token: RawRestoreState) { + // no special implementation as interrupts are not used in the project + } +} diff --git a/pim-os/src/kernel.rs b/pim-os/src/kernel.rs new file mode 100644 index 0000000..fa664e3 --- /dev/null +++ b/pim-os/src/kernel.rs @@ -0,0 +1,5 @@ +pub mod gemv; +pub mod haxpy; +pub mod legacy; +pub mod vadd; +pub mod vmul; diff --git a/pim-os/src/kernel/gemv.rs b/pim-os/src/kernel/gemv.rs new file mode 100644 index 0000000..082b5d2 --- /dev/null +++ b/pim-os/src/kernel/gemv.rs @@ -0,0 +1,105 @@ +use crate::pim::{ + continuous_array::Matrix, interleaved_array, operation::PimOperand, vector::F16x16, +}; +use aarch64_cpu::asm::barrier; +use nalgebra::SVector; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 7, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + matrix: &Matrix, + input_vector: &interleaved_array::Vector<8>, + output_partial_sum_vector: &mut SVector, + dummy: &impl PimOperand, +) { + for block in input_vector.0.iter() { + block.execute_read(); + } + + for sub_matrix in matrix.0.iter() { + for column_block in sub_matrix.fixed_rows::<1>(0).iter() { + column_block.execute_read_async(); + } + } + + barrier::dsb(barrier::SY); + + for chunk in output_partial_sum_vector + .fixed_rows_with_step_mut::(0, 16) + .iter_mut() + { + chunk.execute_write(); + } + + dummy.execute_read(); +} diff --git a/pim-os/src/kernel/haxpy.rs b/pim-os/src/kernel/haxpy.rs new file mode 100644 index 0000000..087ce85 --- /dev/null +++ b/pim-os/src/kernel/haxpy.rs @@ -0,0 +1,77 @@ +use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1}; +use nalgebra::SVector; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 0 }, + src2: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + a: &SVector, + b: &SVector, + interleaved_scalar: &interleaved_array::Vector<1>, + c: &mut SVector, + dummy: &impl PimOperand, +) { + interleaved_scalar.execute_read(); + + a.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + + b.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + + c.fixed_rows_with_step_mut::(0, 256) + .iter_mut() + .for_each(|entry| entry.execute_write()); + + dummy.execute_read(); +} diff --git a/pim-os/src/kernel/legacy.rs b/pim-os/src/kernel/legacy.rs new file mode 100644 index 0000000..667d935 --- /dev/null +++ b/pim-os/src/kernel/legacy.rs @@ -0,0 +1,3 @@ +pub mod matrix_matrix_mul; +pub mod matrix_scalar_mul; +pub mod matrix_vector_mul; diff --git a/pim-os/src/kernel/legacy/matrix_matrix_add.rs b/pim-os/src/kernel/legacy/matrix_matrix_add.rs new file mode 100644 index 0000000..54300ae --- /dev/null +++ b/pim-os/src/kernel/legacy/matrix_matrix_add.rs @@ -0,0 +1,148 @@ +use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion}; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 4 }, + dst: File::GrfA { index: 4 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 5 }, + dst: File::GrfA { index: 5 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 6 }, + dst: File::GrfA { index: 6 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 7 }, + dst: File::GrfA { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, + pim_matrix_arena2: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + for column in 0..C { + for row in 0..R { + pim_matrix_arena0.execute_instruction_read_single_bank(column * R + row); + } + + for row in 0..R { + pim_matrix_arena1.execute_instruction_read_single_bank(column * R + row); + } + + for row in 0..R { + pim_matrix_arena2.execute_instruction_write_single_bank(column * R + row); + } + + dummy_array.execute_instruction_read_single_bank(0); + } +} \ No newline at end of file diff --git a/pim-os/src/kernel/legacy/matrix_matrix_mul.rs b/pim-os/src/kernel/legacy/matrix_matrix_mul.rs new file mode 100644 index 0000000..513fede --- /dev/null +++ b/pim-os/src/kernel/legacy/matrix_matrix_mul.rs @@ -0,0 +1,154 @@ +use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion}; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 63, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +// Vlt in der Thesis kurz erwähnen und dann zu AAM überleiten +// pub fn execute_matrix_multiply_elementwise( +// pim_state: &mut PimState, +// pim_matrix_arena0: &mut PimMatrixArena, +// pim_matrix_arena1: &mut PimMatrixArena, +// pim_matrix_arena2: &mut PimMatrixArena, +// dummy_array: &mut DummyArray, +// ) { +// set_bank_mode(BankMode::PimAllBank); + +// for i in 0..(R * C) { +// let start_column = i % R; +// let start_row = (i / R) * R; + +// for j in 0..C { +// pim_matrix_arena0.execute_instruction_read_single_bank(start_column + R * j); +// } + +// for j in 0..R { +// pim_matrix_arena1.execute_instruction_read_single_bank(start_row + j); +// } + +// pim_matrix_arena2.execute_instruction_write_single_bank(i); + +// dummy_array.execute_instruction_read_single_bank(0); +// } + +// set_bank_mode(BankMode::SingleBank); +// } + +const MATRIX_DIMENSION: usize = 8; + +pub fn execute( + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, + pim_matrix_arena2: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + for row in 0..MATRIX_DIMENSION { + for i in 0..MATRIX_DIMENSION { + pim_matrix_arena0.execute_instruction_read_single_bank(row + MATRIX_DIMENSION * i); + } + + for column in 0..MATRIX_DIMENSION { + for i in 0..MATRIX_DIMENSION { + pim_matrix_arena1.execute_instruction_read_single_bank_unsynchronized( + column * MATRIX_DIMENSION + i, + ); + } + } + + for column in 0..MATRIX_DIMENSION { + pim_matrix_arena2 + .execute_instruction_write_single_bank(column * MATRIX_DIMENSION + row); + } + + dummy_array.execute_instruction_read_single_bank(0); + } +} diff --git a/pim-os/src/kernel/legacy/matrix_scalar_mul.rs b/pim-os/src/kernel/legacy/matrix_scalar_mul.rs new file mode 100644 index 0000000..f93c433 --- /dev/null +++ b/pim-os/src/kernel/legacy/matrix_scalar_mul.rs @@ -0,0 +1,125 @@ +use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion, PimScalarArena}; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 4 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 5 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 6 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::SrfM { index: 0 }, + dst: File::GrfA { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + pim_scalar_arena: &PimScalarArena, + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + for column in 0..C { + pim_scalar_arena.execute_instruction_read_single_bank(0); + + for i in 0..R { + pim_matrix_arena0.execute_instruction_read_single_bank(column * R + i); + } + + for i in 0..R { + pim_matrix_arena1.execute_instruction_write_single_bank(column * R + i); + } + + dummy_array.execute_instruction_read_single_bank(0); + } +} diff --git a/pim-os/src/kernel/legacy/matrix_vector_mul.rs b/pim-os/src/kernel/legacy/matrix_vector_mul.rs new file mode 100644 index 0000000..ac2e807 --- /dev/null +++ b/pim-os/src/kernel/legacy/matrix_vector_mul.rs @@ -0,0 +1,94 @@ +use crate::pim::legacy::array::{DummyArray, PimMatrixArena, PimRegion}; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 7, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + pim_matrix_arena0: &PimMatrixArena, + pim_matrix_arena1: &PimMatrixArena, + pim_matrix_arena2: &mut PimMatrixArena, + dummy_array: &DummyArray, +) { + for row in 0..R { + for i in 0..C { + pim_matrix_arena0.execute_instruction_read_single_bank(row + R * i); + } + + for i in 0..R { + pim_matrix_arena1.execute_instruction_read_single_bank(i); + } + + pim_matrix_arena2.execute_instruction_write_single_bank(row); + + dummy_array.execute_instruction_read_single_bank(0); + } +} diff --git a/pim-os/src/kernel/vadd.rs b/pim-os/src/kernel/vadd.rs new file mode 100644 index 0000000..f638f0d --- /dev/null +++ b/pim-os/src/kernel/vadd.rs @@ -0,0 +1,68 @@ +use crate::pim::{operation::PimOperand, vector::F16x1}; +use nalgebra::SVector; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + a: &SVector, + b: &SVector, + c: &mut SVector, + dummy: &impl PimOperand, +) { + a.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + b.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + c.fixed_rows_with_step_mut::(0, 256) + .iter_mut() + .for_each(|entry| entry.execute_write()); + + dummy.execute_read(); +} diff --git a/pim-os/src/kernel/vmul.rs b/pim-os/src/kernel/vmul.rs new file mode 100644 index 0000000..d11dced --- /dev/null +++ b/pim-os/src/kernel/vmul.rs @@ -0,0 +1,68 @@ +use crate::pim::{operation::PimOperand, vector::F16x1}; +use nalgebra::SVector; +use pim_isa::{File, Instruction, Kernel}; + +pub const KERNEL: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( + a: &SVector, + b: &SVector, + c: &mut SVector, + dummy: &impl PimOperand, +) { + a.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + b.fixed_rows_with_step::(0, 256) + .iter() + .for_each(|entry| entry.execute_read()); + c.fixed_rows_with_step_mut::(0, 256) + .iter_mut() + .for_each(|entry| entry.execute_write()); + + dummy.execute_read(); +} diff --git a/pim-os/src/lib.rs b/pim-os/src/lib.rs new file mode 100644 index 0000000..7b4447b --- /dev/null +++ b/pim-os/src/lib.rs @@ -0,0 +1,32 @@ +#![feature(iter_array_chunks)] +#![no_std] + +use core::sync::atomic::{compiler_fence, Ordering}; + +mod alloc; +mod panic; + +pub mod boot; +pub mod critical_section; +pub mod kernel; +pub mod m5op; +pub mod memory_config; +pub mod pim; +pub mod uart; + +extern "C" { + fn main(); +} + +#[no_mangle] +pub extern "C" fn entry() -> ! { + alloc::init(); + + unsafe { main() } + + m5op::exit(0); + + loop { + compiler_fence(Ordering::SeqCst); + } +} diff --git a/pim-os/src/m5op.rs b/pim-os/src/m5op.rs new file mode 100644 index 0000000..49e235f --- /dev/null +++ b/pim-os/src/m5op.rs @@ -0,0 +1,26 @@ +use core::arch::global_asm; + +global_asm!(include_str!("m5op.s")); + +extern "C" { + fn m5_exit(delay_ns: u64); + fn m5_reset_stats(delay_ns: u64, period_ns: u64); + fn m5_dump_stats(delay_ns: u64, period_ns: u64); + fn m5_dump_reset_stats(delay_ns: u64, period_ns: u64); +} + +pub fn exit(delay_ns: u64) { + unsafe { m5_exit(delay_ns) } +} + +pub fn reset_stats(delay_ns: u64, period_ns: u64) { + unsafe { m5_reset_stats(delay_ns, period_ns) } +} + +pub fn dump_stats(delay_ns: u64, period_ns: u64) { + unsafe { m5_dump_stats(delay_ns, period_ns) } +} + +pub fn dump_reset_stats(delay_ns: u64, period_ns: u64) { + unsafe { m5_dump_reset_stats(delay_ns, period_ns) } +} diff --git a/pim-os/src/m5op.s b/pim-os/src/m5op.s new file mode 100644 index 0000000..a970208 --- /dev/null +++ b/pim-os/src/m5op.s @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010-2013, 2016-2017 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2003-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +.macro m5op_func, name, func + .globl \name + \name: + .long 0xff000110 | (\func << 16) + ret +.endm + +.text +m5op_func m5_exit, 0x21; +m5op_func m5_reset_stats, 0x40; +m5op_func m5_dump_stats, 0x41; +m5op_func m5_dump_reset_stats, 0x42; diff --git a/pim-os/src/memory_config.rs b/pim-os/src/memory_config.rs new file mode 100644 index 0000000..0799764 --- /dev/null +++ b/pim-os/src/memory_config.rs @@ -0,0 +1 @@ +pub const NUMBER_OF_BANKS: usize = 32; diff --git a/pim-os/src/panic.rs b/pim-os/src/panic.rs new file mode 100644 index 0000000..c3795c2 --- /dev/null +++ b/pim-os/src/panic.rs @@ -0,0 +1,15 @@ +use crate::uart::Uart0; +use core::{ + fmt::Write, + panic::PanicInfo, + sync::atomic::{compiler_fence, Ordering}, +}; + +#[panic_handler] +fn panic(info: &PanicInfo) -> ! { + writeln!(Uart0, "{info}").unwrap(); + + loop { + compiler_fence(Ordering::SeqCst); + } +} diff --git a/pim-os/src/pim.rs b/pim-os/src/pim.rs new file mode 100644 index 0000000..9fbea29 --- /dev/null +++ b/pim-os/src/pim.rs @@ -0,0 +1,7 @@ +pub mod config; +pub mod continuous_array; +pub mod interleaved_array; +pub mod legacy; +pub mod operation; +pub mod state; +pub mod vector; diff --git a/pim-os/src/pim/config.rs b/pim-os/src/pim/config.rs new file mode 100644 index 0000000..9de82c8 --- /dev/null +++ b/pim-os/src/pim/config.rs @@ -0,0 +1,41 @@ +use aarch64_cpu::asm::barrier; +use core::{ + arch::asm, + ptr::{addr_of_mut, write_volatile}, +}; + +#[link_section = ".pim_config"] +static mut PIM_CONFIG_REGION: [u8; 0x4000] = [0; 0x4000]; + +pub fn write(s: &str) { + unsafe { + let mut index = 0; + for &byte in s.as_bytes() { + write_volatile( + (addr_of_mut!(PIM_CONFIG_REGION) as *mut u8).offset(index), + byte as _, + ); + barrier::dsb(barrier::SY); + index += 1; + } + write_volatile( + (addr_of_mut!(PIM_CONFIG_REGION) as *mut u8).offset(index), + b'\0', + ); + + // PIM_CONFIG_REGION[..s.len()].copy_from_slice(s.as_bytes()); + // PIM_CONFIG_REGION[s.len()] = b'\0'; + + if cfg!(feature = "cacheless") { + // Be pessimistic so that config region is not optimized away + core::hint::black_box(PIM_CONFIG_REGION); + } else { + // Flush all cache lines that were affected by write operation + for element in PIM_CONFIG_REGION[..s.len()].iter() { + asm!("dc civac, {val}", val = in(reg) element); + } + + barrier::dsb(barrier::SY); + } + } +} diff --git a/pim-os/src/pim/continuous_array.rs b/pim-os/src/pim/continuous_array.rs new file mode 100644 index 0000000..2664df9 --- /dev/null +++ b/pim-os/src/pim/continuous_array.rs @@ -0,0 +1,38 @@ +use super::vector::{F16x1, F16x16}; +use core::fmt::Display; +use nalgebra::{SMatrix, SVector}; + +#[repr(C, align(65536))] +#[derive(Debug)] +pub struct Matrix(pub [SMatrix; X16R]); + +#[repr(C, align(1024))] +#[derive(Debug)] +pub struct Vector(pub SVector); + +impl Display for Matrix { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + for block in self.0.iter() { + block.fmt(f)? + } + Ok(()) + } +} + +impl + From> for Matrix +{ + fn from(matrix: SMatrix) -> Self { + Self(core::array::from_fn(|i| { + SMatrix::from_row_iterator( + matrix + .fixed_rows::<16>(i * 16) + .transpose() + .iter() + .map(|e| *e) + .array_chunks::<16>() + .map(|chunk| F16x16(chunk)), + ) + })) + } +} diff --git a/pim-os/src/pim/interleaved_array.rs b/pim-os/src/pim/interleaved_array.rs new file mode 100644 index 0000000..4442aa1 --- /dev/null +++ b/pim-os/src/pim/interleaved_array.rs @@ -0,0 +1,28 @@ +use super::vector::F16x16; +use crate::memory_config::NUMBER_OF_BANKS; +use nalgebra::SVector; + +#[repr(C, align(512))] +#[derive(Debug)] +pub struct Vector(pub [[F16x16; NUMBER_OF_BANKS]; X16R]); + +impl Default for Vector { + fn default() -> Self { + Self([[F16x16::default(); NUMBER_OF_BANKS]; X16R]) + } +} + +impl From> for Vector { + fn from(input_vector: SVector) -> Self { + let mut interleaved_vector = Self::default(); + + for block_index in 0..X16R { + let element = input_vector[block_index]; + for k in 0..NUMBER_OF_BANKS { + interleaved_vector.0[block_index][k] = element; + } + } + + interleaved_vector + } +} diff --git a/pim-os/src/pim/legacy.rs b/pim-os/src/pim/legacy.rs new file mode 100644 index 0000000..f5d68fc --- /dev/null +++ b/pim-os/src/pim/legacy.rs @@ -0,0 +1 @@ +pub mod array; diff --git a/pim-os/src/pim/legacy/array.rs b/pim-os/src/pim/legacy/array.rs new file mode 100644 index 0000000..d0d88a4 --- /dev/null +++ b/pim-os/src/pim/legacy/array.rs @@ -0,0 +1,227 @@ +use crate::{ + memory_config::NUMBER_OF_BANKS, + pim::vector::{F16x1, F16x16}, +}; +use aarch64_cpu::asm::barrier; +use core::{arch::asm, cell::RefCell}; +use half::f16; +use nalgebra::{Const, Dyn, RawStorage, RawStorageMut}; + +const EVEN_BANK_INDEX: usize = 0; +const ODD_BANK_INDEX: usize = 8; + +#[derive(Clone, Debug)] +#[repr(C, align(65536))] +pub struct PimMatrixArena(pub [[[F16x16; NUMBER_OF_BANKS]; R]; C]); + +impl PimRegion for PimMatrixArena { + const OCCUPIED_CACHE_LINES: usize = R * C * NUMBER_OF_BANKS; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 } + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 } + } +} + +#[derive(Clone, Debug)] +#[repr(C, align(1024))] +pub struct PimScalarArena(pub [F16x16; NUMBER_OF_BANKS]); + +impl PimRegion for PimScalarArena { + const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + unsafe { (self.0.as_ptr() as *const F16x16).add(bank_index) as *const f16 } + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + unsafe { (self.0.as_mut_ptr() as *mut F16x16).add(bank_index) as *mut f16 } + } +} + +#[derive(Debug)] +pub struct PimStorage<'a, const R: usize, const C: usize> { + pub arena: &'a RefCell>, + pub index: usize, +} + +unsafe impl<'a, const R: usize, const C: usize> RawStorage, Const> + for PimStorage<'a, R, C> +{ + type RStride = Dyn; + type CStride = Dyn; + + fn ptr(&self) -> *const F16x1 { + unsafe { (&self.arena.borrow().0[0][0] as *const F16x16 as *const F16x1).add(self.index) } + } + + fn shape(&self) -> (Const, Const) { + (Const::, Const::) + } + + fn strides(&self) -> (Self::RStride, Self::CStride) { + (Dyn(16 * NUMBER_OF_BANKS), Dyn(16 * R * NUMBER_OF_BANKS)) + } + + fn is_contiguous(&self) -> bool { + false + } + + unsafe fn as_slice_unchecked(&self) -> &[F16x1] { + panic!("PimStorage is not contiguous!"); + } +} + +unsafe impl<'a, const R: usize, const C: usize> RawStorageMut, Const> + for PimStorage<'a, R, C> +{ + fn ptr_mut(&mut self) -> *mut F16x1 { + unsafe { + (&mut self.arena.borrow_mut().0[0][0] as *mut F16x16 as *mut F16x1).add(self.index) + } + } + + unsafe fn as_mut_slice_unchecked(&mut self) -> &mut [F16x1] { + panic!("PimStorage is not contiguous!"); + } +} + +pub trait PimRegion { + const OCCUPIED_CACHE_LINES: usize; + + fn bank_ptr(&self, bank_index: usize) -> *const f16; + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16; + + fn execute_instruction_read_single_bank(&self, i: usize) { + if !cfg!(feature = "cacheless") { + self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + barrier::dsb(barrier::SY); + } + + // Read from first bank + self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + + barrier::dsb(barrier::SY); + } + + fn execute_instruction_read_single_bank_unsynchronized(&self, i: usize) { + self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + } + + fn execute_instruction_read_dual_bank(&self) { + let i = 0; + if !cfg!(feature = "cacheless") { + self.invalidate_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + self.invalidate_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS); + + barrier::dsb(barrier::SY); + } + + // Read from first and second bank + self.read_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + self.read_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS); + + barrier::dsb(barrier::SY); + } + + fn read_data_bank(&self, bank_index: usize) { + let bank = self.bank_ptr(bank_index); + // writeln!(&mut crate::uart::Uart0 {}, "Read data {:?}", bank).unwrap(); + unsafe { core::ptr::read_volatile(bank) }; + } + + fn execute_instruction_write_single_bank(&mut self, i: usize) { + if !cfg!(feature = "cacheless") { + self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + barrier::dsb(barrier::SY); + } + + // Write to first bank + self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + + if !cfg!(feature = "cacheless") { + self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + } + + barrier::dsb(barrier::SY); + } + + fn execute_instruction_write_dual_bank(&mut self) { + let i = 0; + if !cfg!(feature = "cacheless") { + self.preload_zero_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + self.preload_zero_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS); + barrier::dsb(barrier::SY); + } + + // Write to first and second bank + self.write_data_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + self.write_data_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS); + + if !cfg!(feature = "cacheless") { + self.invalidate_flush_bank(EVEN_BANK_INDEX + i * NUMBER_OF_BANKS); + self.invalidate_flush_bank(ODD_BANK_INDEX + i * NUMBER_OF_BANKS); + } + + barrier::dsb(barrier::SY); + } + + fn write_data_bank(&mut self, bank_index: usize) { + let bank = self.bank_ptr_mut(bank_index); + unsafe { + core::ptr::write_volatile(bank, Default::default()); + } + } + + fn invalidate(&self) { + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_bank(idx)); + } + + fn invalidate_bank(&self, bank_index: usize) { + let bank = self.bank_ptr(bank_index); + unsafe { + asm!("dc ivac, {val}", val = in(reg) bank); + } + } + + fn invalidate_flush(&self) { + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.invalidate_flush_bank(idx)); + } + + fn invalidate_flush_bank(&self, bank_index: usize) { + let bank = self.bank_ptr(bank_index); + unsafe { + asm!("dc civac, {val}", val = in(reg) bank); + } + } + + fn preload_zero(&self) { + (0..Self::OCCUPIED_CACHE_LINES).for_each(|idx| self.preload_zero_bank(idx)); + } + + fn preload_zero_bank(&self, bank_index: usize) { + let bank = self.bank_ptr(bank_index); + unsafe { + // Preload first bank + asm!("dc zva, {val}", val = in(reg) bank); + } + } +} + +#[repr(C, align(1024))] +pub struct DummyArray(pub [F16x16; NUMBER_OF_BANKS]); + +impl PimRegion for DummyArray { + const OCCUPIED_CACHE_LINES: usize = NUMBER_OF_BANKS; + + fn bank_ptr(&self, bank_index: usize) -> *const f16 { + &self.0[bank_index] as *const F16x16 as *const f16 + } + + fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 { + &mut self.0[bank_index] as *mut F16x16 as *mut f16 + } +} diff --git a/pim-os/src/pim/operation.rs b/pim-os/src/pim/operation.rs new file mode 100644 index 0000000..a0c59b4 --- /dev/null +++ b/pim-os/src/pim/operation.rs @@ -0,0 +1,30 @@ +use aarch64_cpu::asm::barrier; + +pub trait PimOperand { + fn ptr(&self) -> *const u8; + fn ptr_mut(&mut self) -> *mut u8; + + fn execute_read(&self) { + unsafe { core::ptr::read_volatile(self.ptr()) }; + barrier::dsb(barrier::SY); + } + + fn execute_read_async(&self) { + unsafe { core::ptr::read_volatile(self.ptr()) }; + } + + fn execute_write(&mut self) { + unsafe { core::ptr::write_volatile(self.ptr_mut(), Default::default()) }; + barrier::dsb(barrier::SY); + } +} + +impl PimOperand for T { + fn ptr(&self) -> *const u8 { + core::ptr::addr_of!(*self) as *const _ + } + + fn ptr_mut(&mut self) -> *mut u8 { + core::ptr::addr_of_mut!(*self) as *mut _ + } +} diff --git a/pim-os/src/pim/state.rs b/pim-os/src/pim/state.rs new file mode 100644 index 0000000..d347b4b --- /dev/null +++ b/pim-os/src/pim/state.rs @@ -0,0 +1,25 @@ +use super::config; +use pim_isa::{BankMode, Kernel, PimConfig}; + +// TODO return token and return to singlebank when dropped +pub fn set_bank_mode(bank_mode: BankMode) { + config::write( + serde_json_core::to_string::(&PimConfig { + kernel: None, + bank_mode: Some(bank_mode), + }) + .unwrap() + .as_str(), + ); +} + +pub fn set_kernel(kernel: &Kernel) { + config::write( + serde_json_core::to_string::(&PimConfig { + kernel: Some(kernel.clone()), + bank_mode: None, + }) + .unwrap() + .as_str(), + ); +} diff --git a/pim-os/src/pim/vector.rs b/pim-os/src/pim/vector.rs new file mode 100644 index 0000000..d74ce3d --- /dev/null +++ b/pim-os/src/pim/vector.rs @@ -0,0 +1,125 @@ +use core::fmt::{Debug, Display}; + +use half::f16; + +pub const ELEMENT_COUNT: usize = 16; + +#[repr(C)] +#[derive(Default, Clone, Copy, PartialEq)] +pub struct F16x1(pub f16); + +impl core::fmt::Debug for F16x1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + Debug::fmt(&self.0, f) + } +} + +impl core::fmt::Display for F16x1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl num_traits::identities::Zero for F16x1 { + fn zero() -> Self { + Self(f16::ZERO) + } + + fn is_zero(&self) -> bool { + self.0 == f16::ZERO + } +} + +impl num_traits::identities::One for F16x1 { + fn one() -> Self { + Self(f16::ONE) + } +} + +impl core::ops::Add for F16x1 { + type Output = Self; + + fn add(self, rhs: F16x1) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl core::ops::AddAssign for F16x1 { + fn add_assign(&mut self, rhs: F16x1) { + self.0 += rhs.0; + } +} + +impl core::ops::Mul for F16x1 { + type Output = Self; + + fn mul(self, rhs: F16x1) -> Self::Output { + Self(self.0 * rhs.0) + } +} + +impl core::ops::MulAssign for F16x1 { + fn mul_assign(&mut self, rhs: F16x1) { + self.0 *= rhs.0; + } +} + +#[repr(C)] +#[derive(Default, Debug, Clone, Copy, PartialEq)] +pub struct F16x16(pub [F16x1; ELEMENT_COUNT]); + +impl Display for F16x16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl num_traits::identities::Zero for F16x16 { + fn zero() -> Self { + Self([F16x1::zero(); ELEMENT_COUNT]) + } + + fn is_zero(&self) -> bool { + self.0 == [F16x1::zero(); ELEMENT_COUNT] + } +} + +impl num_traits::identities::One for F16x16 { + fn one() -> Self { + Self([F16x1::one(); ELEMENT_COUNT]) + } +} + +impl core::ops::Add for F16x16 { + type Output = Self; + + fn add(self, rhs: F16x16) -> Self::Output { + Self(core::array::from_fn(|i| self.0[i] + rhs.0[i])) + } +} + +impl core::ops::AddAssign for F16x16 { + fn add_assign(&mut self, rhs: F16x16) { + self.0 + .iter_mut() + .zip(&rhs.0) + .for_each(|(left, right)| *left += *right); + } +} + +impl core::ops::Mul for F16x16 { + type Output = Self; + + fn mul(self, rhs: F16x16) -> Self::Output { + Self(core::array::from_fn(|i| self.0[i] * rhs.0[i])) + } +} + +impl core::ops::MulAssign for F16x16 { + fn mul_assign(&mut self, rhs: F16x16) { + self.0 + .iter_mut() + .zip(&rhs.0) + .for_each(|(left, right)| *left *= *right); + } +} diff --git a/pim-os/src/start.s b/pim-os/src/start.s new file mode 100644 index 0000000..1c4c617 --- /dev/null +++ b/pim-os/src/start.s @@ -0,0 +1,80 @@ + +.extern LD_STACK_PTR + +// Put a 64-bit value with little endianness. +.macro PUT_64B high, low + .word \low + .word \high +.endm + +// Create an entry pointing to a next-level table. +.macro TABLE_ENTRY PA, ATTR + PUT_64B \ATTR, (\PA) + 0x3 +.endm + +// Create an entry for a 1GB block. +.macro BLOCK_1GB PA, ATTR_HI, ATTR_LO + PUT_64B \ATTR_HI | ((\PA) >> 32), ((\PA) & 0xC0000000) | \ATTR_LO | 0x1 +.endm + +// Create an entry for a 2MB block. +.macro BLOCK_2MB PA, ATTR_HI, ATTR_LO + PUT_64B \ATTR_HI, ((\PA) & 0xFFE00000) | \ATTR_LO | 0x1 +.endm + +.section .init + +.align 12 +ttb0_base: +.set ADDR, 0x000 +.rept 0x02 +BLOCK_1GB (ADDR << 29), 0, 0x740 +.set ADDR, ADDR+2 +.endr + +// Cached normal DRAM region +BLOCK_1GB (ADDR << 29), 0, 0x74C +.set ADDR, ADDR+2 + +// Non-cached PIM DRAM region +BLOCK_1GB (ADDR << 29), 0, 0x740 +.set ADDR, ADDR+2 + +// Map rest of Page Table to avoid undefined behavior +.rept 0x3C +BLOCK_1GB (ADDR << 29), 0, 0x74C +.set ADDR, ADDR+2 +.endr + +.globl _start +_start: + ldr x30, =LD_STACK_PTR + mov sp, x30 + + // Initialize translation table control registers + ldr x1, =0x13520 // 64GB space 4KB granularity Inner-shareable. Normal Inner and Outer Cacheable. + msr tcr_el3, x1 + + ldr x1, =0xFF440400 + msr mair_el3, x1 // ATTR0 Device-nGnRnE ATTR1 Device. ATTR2 Normal Non-Cacheable. ATTR3 Normal Cacheable. + + bl set_page_table + + // Enable MMU and caches + mrs x0, sctlr_el3 + orr x0, x0, #(0x1 << 2) // The C bit (data cache). + orr x0, x0, #(0x1 << 12) // The I bit (instruction cache). + orr x0, x0, #0x1 // The M bit (MMU). + msr sctlr_el3, x0 + dsb sy + isb + + bl entry + +.globl set_page_table +set_page_table: + adr x0, ttb0_base + msr ttbr0_el3, x0 + tlbi alle3 + isb + ret diff --git a/pim-os/src/uart.rs b/pim-os/src/uart.rs new file mode 100644 index 0000000..ecee08a --- /dev/null +++ b/pim-os/src/uart.rs @@ -0,0 +1,17 @@ +use core::{fmt::Write, ptr::write_volatile}; + +const UART0_ADDR: *mut u32 = 0x1c090000 as _; + +#[derive(Debug)] +pub struct Uart0; + +impl Write for Uart0 { + fn write_str(&mut self, s: &str) -> core::fmt::Result { + for &byte in s.as_bytes() { + unsafe { + write_volatile(UART0_ADDR, byte as _); + } + } + Ok(()) + } +} diff --git a/pim-vm/Cargo.lock b/pim-vm/Cargo.lock new file mode 100644 index 0000000..550e4b3 --- /dev/null +++ b/pim-vm/Cargo.lock @@ -0,0 +1,402 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "cxx" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de00f15a6fa069c99b88c5c78c4541d0e7899a33b86f7480e23df2431fce0bc" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f3fed61d56ba497c4efef9144dfdbaa25aa58f2f6b3a7cf441d4591c583745c" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8908e380a8efd42150c017b0cfa31509fc49b6d47f7cb6b33e93ffb8f4e3661e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_logger" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "is-terminal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "link-cplusplus" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" +dependencies = [ + "cc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "pim-isa" +version = "0.1.0" +dependencies = [ + "serde", +] + +[[package]] +name = "pim-vm" +version = "0.1.0" +dependencies = [ + "cxx", + "env_logger", + "half", + "log", + "pim-isa", + "serde_json", +] + +[[package]] +name = "proc-macro2" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "serde" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/pim-vm/Cargo.toml b/pim-vm/Cargo.toml new file mode 100644 index 0000000..d42c110 --- /dev/null +++ b/pim-vm/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "pim-vm" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["staticlib"] + +[features] +shared_pim_units = [] + +[dependencies] +cxx = "1.0.110" +env_logger = "0.10.1" +half = "2.3.1" +log = "0.4.20" +pim-isa = { path = "../pim-isa" } +serde_json = "1.0.108" diff --git a/pim-vm/src/lib.rs b/pim-vm/src/lib.rs new file mode 100644 index 0000000..367718d --- /dev/null +++ b/pim-vm/src/lib.rs @@ -0,0 +1,451 @@ +use half::f16; +use pim_isa::{BankMode, File, Instruction, Kernel}; + +#[cxx::bridge(namespace = "pim_vm")] +mod ffi { + pub enum BankMode { + SingleBank, + AllBank, + PimAllBank, + } + + extern "Rust" { + type PimVM; + + fn new_pim_vm(num_banks: u32) -> Box; + fn reset(&mut self); + fn apply_config(&mut self, config: &str); + fn bank_mode(&self) -> BankMode; + fn execute_read( + &mut self, + bank_index: u32, + address: u32, + row: u32, + column: u32, + bank_data: &[u8], + ); + fn execute_write(&mut self, bank_index: u32) -> [u8; 32]; + + fn init_logger(); + } +} + +fn init_logger() { + env_logger::init(); +} + +const GRF_A_BIT_OFFSET: usize = 2; +const GRF_B_BIT_OFFSET: usize = 5; +const COLUMN_BITS : usize = 7; + +const BURST_LENGTH: usize = 32; + +const GRF_NUM_REGISTERS: usize = 8; +const SRF_A_NUM_REGISTERS: usize = 8; +const SRF_M_NUM_REGISTERS: usize = 8; + +const FP_UNITS: usize = 16; +type GrfRegister = [f16; FP_UNITS]; + +#[derive(Clone, Debug)] +struct PimUnit { + grf_a: [GrfRegister; GRF_NUM_REGISTERS], + grf_b: [GrfRegister; GRF_NUM_REGISTERS], + srf_a: [f16; SRF_A_NUM_REGISTERS], + srf_m: [f16; SRF_A_NUM_REGISTERS], + pc: u8, + jump_counter: Option, +} + +impl PimUnit { + fn reset(&mut self) { + *self = Self::default(); + } +} + +impl Default for PimUnit { + fn default() -> Self { + Self { + grf_a: [[f16::ZERO; FP_UNITS]; GRF_NUM_REGISTERS], + grf_b: [[f16::ZERO; FP_UNITS]; GRF_NUM_REGISTERS], + srf_a: [f16::ZERO; SRF_A_NUM_REGISTERS], + srf_m: [f16::ZERO; SRF_M_NUM_REGISTERS], + pc: 0, + jump_counter: None, + } + } +} + +#[derive(Debug)] +struct PimVM { + pim_units: Vec, + bank_mode: pim_isa::BankMode, + kernel: pim_isa::Kernel, +} + +impl PimVM { + fn reset(&mut self) { + for unit in self.pim_units.iter_mut() { + unit.reset(); + } + } + + fn apply_config(&mut self, config_str: &str) { + let config = serde_json::from_str::(config_str).unwrap(); + + if let Some(kernel) = config.kernel { + self.kernel = kernel; + } + + if let Some(bank_mode) = config.bank_mode { + self.bank_mode = bank_mode; + } + } + + fn bank_mode(&self) -> ffi::BankMode { + match self.bank_mode { + BankMode::SingleBank => ffi::BankMode::SingleBank, + BankMode::AllBank => ffi::BankMode::AllBank, + BankMode::PimAllBank => ffi::BankMode::PimAllBank, + } + } +} + +fn new_pim_vm(num_banks: u32) -> Box { + let num_pim_units = if cfg!(feature = "shared_pim_units") { + num_banks / 2 + } else { + num_banks + }; + + Box::new(PimVM { + pim_units: vec![PimUnit::default(); num_pim_units as _], + bank_mode: BankMode::SingleBank, + kernel: Kernel::NOP, + }) +} + +#[repr(C)] +struct BankData([f16; FP_UNITS]); + +impl PimVM { + pub fn execute_read( + &mut self, + bank_index: u32, + address: u32, + row: u32, + column: u32, + bank_data: &[u8], + ) { + assert_eq!(bank_data.len(), BURST_LENGTH); + + let pim_unit_index = if cfg!(feature = "shared_pim_units") { + bank_index / 2 + } else { + bank_index + }; + + let pim_unit = &mut self.pim_units[pim_unit_index as usize]; + + let inst = self.kernel.0[pim_unit.pc as usize]; + + let row_column_bits = (row << COLUMN_BITS) | column; + let aam_grf_a_index = (row_column_bits >> GRF_A_BIT_OFFSET) & 0b111; + let aam_grf_b_index = (row_column_bits >> GRF_B_BIT_OFFSET) & 0b111; + + if pim_unit_index == 0 { + log::debug!( + "PimUnit {pim_unit_index} at {address:#x} (B{aam_grf_b_index}, A{aam_grf_a_index}) Execute Read PC {}: {inst:?}", + pim_unit.pc + ); + } + + match inst { + Instruction::NOP => (), + Instruction::EXIT => { + pim_unit.reset(); + return; + } + Instruction::JUMP { .. } => unreachable!(), + Instruction::MOV { src, dst } | Instruction::FILL { src, dst } => { + let data = PimVM::load(src, pim_unit, &bank_data); + PimVM::store(dst, pim_unit, &data); + } + Instruction::ADD { + src0, + mut src1, + mut dst, + aam, + } => { + if aam { + src1 = if let File::GrfA { index: _ } = src1 { + File::GrfA { + index: aam_grf_a_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + + dst = if let File::GrfB { index: _ } = dst { + File::GrfB { + index: aam_grf_b_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + } + + let data0 = PimVM::load(src0, pim_unit, &bank_data); + let data1 = PimVM::load(src1, pim_unit, &bank_data); + + let sum: [f16; FP_UNITS] = data0 + .into_iter() + .zip(data1) + .map(|(src0, src1)| src0 + src1) + .collect::>() + .try_into() + .unwrap(); + + PimVM::store(dst, pim_unit, &sum); + } + Instruction::MUL { + src0, + mut src1, + mut dst, + aam, + } => { + if aam { + src1 = if let File::GrfA { index: _ } = src1 { + File::GrfA { + index: aam_grf_a_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + + dst = if let File::GrfB { index: _ } = dst { + File::GrfB { + index: aam_grf_b_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + } + + let data0 = PimVM::load(src0, pim_unit, &bank_data); + let data1 = PimVM::load(src1, pim_unit, &bank_data); + + let product: [f16; FP_UNITS] = data0 + .into_iter() + .zip(data1) + .map(|(src0, src1)| src0 * src1) + .collect::>() + .try_into() + .unwrap(); + + PimVM::store(dst, pim_unit, &product); + } + Instruction::MAC { + src0, + mut src1, + mut src2, + mut dst, + aam, + } + | Instruction::MAD { + src0, + mut src1, + mut src2, + mut dst, + aam, + } => { + if aam { + src1 = if let File::GrfA { index: _ } = src1 { + // if pim_unit_index == 0 { + // log::debug!("AAM index GrfA {aam_grf_a_index}"); + // } + File::GrfA { + index: aam_grf_a_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + + src2 = if let File::GrfB { index: _ } = src2 { + // if pim_unit_index == 0 { + // log::debug!("AAM index GrfB {aam_grf_a_index}"); + // } + File::GrfB { + index: aam_grf_b_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + + dst = if let File::GrfB { index: _ } = dst { + File::GrfB { + index: aam_grf_b_index as _, + } + } else { + panic!("Invalid operand in address-aligned-mode"); + }; + } + + assert_eq!(src2, dst); + + let data0 = PimVM::load(src0, pim_unit, &bank_data); + let data1 = PimVM::load(src1, pim_unit, &bank_data); + let data2 = PimVM::load(src2, pim_unit, &bank_data); + + let product: [f16; FP_UNITS] = data0 + .into_iter() + .zip(data1) + .map(|(src0, src1)| src0 * src1) + .collect::>() + .try_into() + .unwrap(); + + let sum: [f16; FP_UNITS] = product + .into_iter() + .zip(data2) + .map(|(product, src2)| product + src2) + .collect::>() + .try_into() + .unwrap(); + + // if pim_unit_index == 0 { + // log::debug!( + // "\n{:?}\n{:?}\n{:?}\n{:?}\n{:?}", + // data0[0], + // data1[0], + // data2[0], + // product[0], + // sum[0] + // ); + // } + PimVM::store(dst, pim_unit, &sum); + } + } + + pim_unit.pc += 1; + + // The JUMP instruction is zero-cycle and not actually executed + while let Instruction::JUMP { offset, count } = self.kernel.0[pim_unit.pc as usize] { + pim_unit.jump_counter = match pim_unit.jump_counter { + Some(jump_counter) => jump_counter.checked_sub(1), + None => count.checked_sub(1), + }; + + if pim_unit.jump_counter != None { + let new_pc = pim_unit.pc as i32 + offset as i32; + + if new_pc < 0 || new_pc >= 32 { + panic!("Invalid PC {new_pc} after JUMP: {inst:?}"); + } + + pim_unit.pc = new_pc as _; + } else { + pim_unit.pc += 1; + } + + // if pim_unit_index == 0 { + // log::debug!( + // "PimUnit {pim_unit_index} JUMP to PC {}: {:?}", + // pim_unit.pc, + // self.kernel.0[pim_unit.pc as usize] + // ); + // } + } + } + + pub fn execute_write(&mut self, bank_index: u32) -> [u8; BURST_LENGTH] { + let pim_unit_index = if cfg!(feature = "shared_pim_units") { + bank_index / 2 + } else { + bank_index + }; + + let pim_unit = &mut self.pim_units[pim_unit_index as usize]; + let inst = self.kernel.0[pim_unit.pc as usize]; + + if pim_unit_index == 0 { + log::debug!( + "PimUnit {pim_unit_index} Execute Write PC {}: {inst:?}", + pim_unit.pc + ); + } + + let data = match inst { + Instruction::FILL { src, dst } => { + let data: [f16; FP_UNITS] = match src { + File::GrfA { index } => pim_unit.grf_a[index as usize], + File::GrfB { index } => pim_unit.grf_b[index as usize], + _ => panic!("Unsupported src operand: {src:?}"), + }; + + if dst != File::Bank { + panic!("Unsupported dst operand: {dst:?}") + } + + // if pim_unit_index == 0 { + // log::debug!("Store {data:?}"); + // } + + data + } + _ => panic!("Unsupported instruction for write: {inst:?}"), + }; + + pim_unit.pc += 1; + + // The JUMP instruction is zero-cycle and not actually executed + while let Instruction::JUMP { offset, count } = self.kernel.0[pim_unit.pc as usize] { + pim_unit.jump_counter = match pim_unit.jump_counter { + Some(jump_counter) => jump_counter.checked_sub(1), + None => count.checked_sub(1), + }; + + if pim_unit.jump_counter != None { + let new_pc = pim_unit.pc as i32 + offset as i32; + + if new_pc < 0 || new_pc >= 32 { + panic!("Invalid PC {new_pc} after JUMP: {inst:?}"); + } + + pim_unit.pc = new_pc as _; + } else { + pim_unit.pc += 1; + } + + if pim_unit_index == 0 { + log::debug!( + "PimUnit {pim_unit_index} JUMP to PC {}: {:?}", + pim_unit.pc, + self.kernel.0[pim_unit.pc as usize] + ); + } + } + + unsafe { std::mem::transmute(data) } + } + + fn load(src: File, pim_unit: &PimUnit, bank_data: &[u8]) -> [f16; FP_UNITS] { + match src { + File::GrfA { index } => pim_unit.grf_a[index as usize], + File::GrfB { index } => pim_unit.grf_b[index as usize], + File::SrfM { index } => [pim_unit.srf_m[index as usize]; FP_UNITS], + File::SrfA { index } => [pim_unit.srf_a[index as usize]; FP_UNITS], + File::Bank => unsafe { std::ptr::read(bank_data.as_ptr() as *const BankData).0 }, + } + } + + fn store(dst: File, pim_unit: &mut PimUnit, data: &[f16; FP_UNITS]) { + match dst { + File::GrfA { index } => pim_unit.grf_a[index as usize] = data.clone(), + File::GrfB { index } => pim_unit.grf_b[index as usize] = data.clone(), + File::SrfM { index } => pim_unit.srf_m[index as usize] = data[0], + File::SrfA { index } => pim_unit.srf_a[index as usize] = data[0], + File::Bank => panic!("Unsupported dst operand: {dst:?}"), + } + } +}