Compare commits

...

10 Commits

24 changed files with 1876 additions and 102 deletions

View File

@@ -8,6 +8,10 @@ forced-target = "aarch64-unknown-none"
[features]
cacheless = []
X1 = []
X2 = []
X3 = []
X4 = []
[dependencies]
aarch64-cpu = "9.4.0"

View File

@@ -16,7 +16,7 @@ SECTIONS
.bss : { *(.bss) } > dram
. = ALIGN(8);
. = . + 0x100000; # 1 MiB Stack
. = . + 0x10000000; # 100 MiB Stack
LD_STACK_PTR = .;
.pim_config : { KEEP(*(.pim_config)) } > dram_pim_config

6
pim-os/build.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
CARGO_TARGET_DIR=kernels/X1 cargo build --release --features X1
CARGO_TARGET_DIR=kernels/X2 cargo build --release --features X2
CARGO_TARGET_DIR=kernels/X3 cargo build --release --features X3
CARGO_TARGET_DIR=kernels/X4 cargo build --release --features X4

View File

@@ -1,23 +0,0 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::fmt::Write;
use nalgebra::SMatrix;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[no_mangle]
pub extern "C" fn main() {
let matrices0 = [SMatrix::<F16x1, 8, 8>::zeros(); 512];
let matrices1 = [SMatrix::<F16x1, 8, 8>::zeros(); 512];
for _ in 0..100 {
for i in 0..512 {
let matrix2 = matrices0[i] * matrices1[i];
core::hint::black_box(matrix2);
}
}
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,45 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::{arch::asm, fmt::Write};
use nalgebra::{SMatrix, SVector};
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 128;
#[cfg(feature = "X2")]
const ROWS: usize = 256;
#[cfg(feature = "X3")]
const ROWS: usize = 512;
#[cfg(feature = "X4")]
const ROWS: usize = 1024;
const COLUMNS: usize = 128;
#[no_mangle]
pub extern "C" fn main() {
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
// Flush cache
for element in matrix.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
for element in input_vector.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
pim_os::m5op::exit(0);
for _ in 0..10 {
let output_vector = matrix * input_vector;
core::hint::black_box(output_vector);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,46 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::fmt::Write;
use half::f16;
use nalgebra::{SMatrix, SVector};
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const DIMENSIONS: usize = 128;
#[cfg(feature = "X2")]
const DIMENSIONS: usize = 256;
#[cfg(feature = "X3")]
const DIMENSIONS: usize = 512;
#[cfg(feature = "X4")]
const DIMENSIONS: usize = 1024;
const ROWS: usize = DIMENSIONS;
const COLUMNS: usize = DIMENSIONS;
#[no_mangle]
pub extern "C" fn main() {
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
let mut input_vector = SVector::<F16x1, COLUMNS>::zeros();
pim_os::m5op::exit(0);
for _ in 0..1 {
input_vector = matrix * input_vector;
input_vector = input_vector.map(|element| {
if element.0 < f16::ZERO {
F16x1(f16::ZERO)
} else {
element
}
});
core::hint::black_box(input_vector);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,46 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::{arch::asm, fmt::Write};
use nalgebra::SVector;
use num_traits::identities::Zero;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]
pub extern "C" fn main() {
let a = SVector::<F16x1, ROWS>::zeros();
let b = SVector::<F16x1, ROWS>::zeros();
let s = F16x1::zero();
// Flush cache
for element in a.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
for element in b.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
pim_os::m5op::exit(0);
for _ in 0..10 {
let a_s = a * s;
let c = a_s.component_mul(&b);
core::hint::black_box(c);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,42 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::{arch::asm, fmt::Write};
use nalgebra::SVector;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]
pub extern "C" fn main() {
let a = SVector::<F16x1, ROWS>::zeros();
let b = SVector::<F16x1, ROWS>::zeros();
for element in a.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
for element in b.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
pim_os::m5op::exit(0);
for _ in 0..10 {
let c = a + b;
core::hint::black_box(c);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,42 @@
#![no_std]
#![no_main]
extern crate alloc;
use core::{arch::asm, fmt::Write};
use nalgebra::SVector;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]
pub extern "C" fn main() {
let a = SVector::<F16x1, ROWS>::zeros();
let b = SVector::<F16x1, ROWS>::zeros();
for element in a.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
for element in b.iter() {
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
}
pim_os::m5op::exit(0);
for _ in 0..10 {
let c = a.component_mul(&b);
core::hint::black_box(c);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -18,14 +18,36 @@ use pim_os::{
uart::Uart0,
};
const ROWS: usize = 32;
const ROWS: usize = 128;
const COLUMNS: usize = 128;
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[cfg(feature = "X1")]
const REPETITIONS: usize = 1;
#[cfg(feature = "X2")]
const REPETITIONS: usize = 2;
#[cfg(feature = "X3")]
const REPETITIONS: usize = 4;
#[cfg(feature = "X4")]
const REPETITIONS: usize = 8;
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL);
// #[cfg(feature = "X1")]
// pim::state::set_kernel(&gemv::KERNEL_X1);
// #[cfg(feature = "X2")]
// pim::state::set_kernel(&gemv::KERNEL_X2);
// #[cfg(feature = "X3")]
// pim::state::set_kernel(&gemv::KERNEL_X3);
// #[cfg(feature = "X4")]
pim::state::set_kernel(&gemv::KERNEL_X4);
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
matrix.fill_lower_triangle(F16x1::one(), 0);
@@ -42,28 +64,36 @@ pub extern "C" fn main() {
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
pim::state::set_bank_mode(BankMode::PimAllBank);
pim_os::m5op::exit(0);
for _ in 0..10 {
// Execute kernel
{
pim::state::set_bank_mode(BankMode::PimAllBank);
gemv::execute(
pim_matrix.as_ref(),
interleaved_input_vector.as_ref(),
output_partial_sum_vector.as_mut(),
dummy.as_ref(),
);
for _ in 0..REPETITIONS {
gemv::execute(
pim_matrix.as_ref(),
interleaved_input_vector.as_ref(),
output_partial_sum_vector.as_mut(),
dummy.as_ref(),
);
}
pim::state::set_bank_mode(BankMode::SingleBank);
pim::state::set_bank_mode(BankMode::SingleBank);
}
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector[r]
.0
.iter()
.fold(F16x1::zero(), |acc, val| acc + *val)
});
core::hint::black_box(output_vector);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector[r]
.0
.iter()
.fold(F16x1::zero(), |acc, val| acc + *val)
});
writeln!(Uart0, "{output_vector}").unwrap();
// writeln!(Uart0, "{output_vector}").unwrap();
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -0,0 +1,91 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use nalgebra::{SMatrix, SVector};
use num_traits::{One, Zero};
use pim_isa::BankMode;
use pim_os::{
kernel::gemv,
pim::{
self, interleaved_array,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[cfg(feature = "X1")]
const REPETITIONS: usize = 1;
#[cfg(feature = "X2")]
const REPETITIONS: usize = 2;
#[cfg(feature = "X3")]
const REPETITIONS: usize = 4;
#[cfg(feature = "X4")]
const REPETITIONS: usize = 8;
const ROWS: usize = 128;
const COLUMNS: usize = 128; // Has to be 128. Assume DIMENSIONS and fill rest with zeros.
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL_X4);
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
matrix.fill_lower_triangle(F16x1::one(), 0);
let pim_matrix = Box::new(pim::continuous_array::Matrix::<X16_ROWS, X16_COLUMNS>::from(matrix));
let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one());
let mut output_partial_sum_vector = Box::new(SVector::<F16x16, ROWS>::zeros());
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
pim_os::m5op::exit(0);
for _ in 0..1 {
let interleaved_input_vector =
Box::new(interleaved_array::Vector::from(input_vector.clone()));
// Execute kernel
{
pim::state::set_bank_mode(BankMode::PimAllBank);
for _ in 0..(REPETITIONS * REPETITIONS) {
gemv::execute(
pim_matrix.as_ref(),
interleaved_input_vector.as_ref(),
output_partial_sum_vector.as_mut(),
dummy.as_ref(),
);
}
pim::state::set_bank_mode(BankMode::SingleBank);
}
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector[r]
.0
.iter()
.fold(F16x1::zero(), |acc, val| acc + *val)
});
core::hint::black_box(output_vector);
}
pim_os::m5op::exit(0);
writeln!(Uart0, "Done").unwrap();
}

93
pim-os/src/bin/haxpy.rs Normal file
View File

@@ -0,0 +1,93 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::haxpy,
pim::{
self, interleaved_array,
vector::{F16x1, F16x16},
},
uart::Uart0,
};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
#[cfg(feature = "X1")]
pim::state::set_kernel(&haxpy::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&haxpy::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&haxpy::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&haxpy::KERNEL_X4);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
));
let scalar_vector = SVector::<F16x16, 1>::from_element(F16x16([F16x1(f16::NEG_ONE); 16]));
let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector));
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
pim::state::set_bank_mode(BankMode::PimAllBank);
pim_os::m5op::exit(0);
for _ in 0..10 {
haxpy::execute::<ROWS, BLOCKS>(
&a.0,
&b.0,
&interleaved_scalar_vector,
&mut c.0,
dummy.as_ref(),
);
}
pim_os::m5op::exit(0);
pim::state::set_bank_mode(BankMode::SingleBank);
}
// writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -6,36 +6,57 @@ extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use nalgebra::{SMatrix, SVector};
use num_traits::{One, Zero};
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::gemv,
pim::{
self, interleaved_array,
vector::{F16x1, F16x16},
},
kernel::vadd,
pim::{self, vector::F16x1},
uart::Uart0,
};
const ROWS: usize = 32;
const COLUMNS: usize = 128;
const X16_ROWS: usize = ROWS / 16;
const X16_COLUMNS: usize = COLUMNS / 16;
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&gemv::KERNEL);
#[cfg(feature = "X1")]
pim::state::set_kernel(&vadd::KERNEL_X1);
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
matrix.fill_lower_triangle(F16x1::one(), 0);
#[cfg(feature = "X2")]
pim::state::set_kernel(&vadd::KERNEL_X2);
let pim_matrix = Box::new(pim::continuous_array::Matrix::<X16_ROWS, X16_COLUMNS>::from(matrix));
#[cfg(feature = "X3")]
pim::state::set_kernel(&vadd::KERNEL_X3);
let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one());
let interleaved_input_vector = Box::new(interleaved_array::Vector::from(input_vector));
#[cfg(feature = "X4")]
pim::state::set_kernel(&vadd::KERNEL_X4);
let mut output_partial_sum_vector = Box::new(SVector::<F16x16, ROWS>::zeros());
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
));
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
@@ -46,24 +67,15 @@ pub extern "C" fn main() {
{
pim::state::set_bank_mode(BankMode::PimAllBank);
gemv::execute(
pim_matrix.as_ref(),
interleaved_input_vector.as_ref(),
output_partial_sum_vector.as_mut(),
dummy.as_ref(),
);
pim_os::m5op::exit(0);
for _ in 0..10 {
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
}
pim_os::m5op::exit(0);
pim::state::set_bank_mode(BankMode::SingleBank);
}
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
output_partial_sum_vector[r]
.0
.iter()
.fold(F16x1::zero(), |acc, val| acc + *val)
});
writeln!(Uart0, "{output_vector}").unwrap();
// writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

81
pim-os/src/bin/vmul.rs Normal file
View File

@@ -0,0 +1,81 @@
#![no_std]
#![no_main]
extern crate alloc;
use aarch64_cpu::asm::barrier;
use alloc::boxed::Box;
use core::fmt::Write;
use half::f16;
use nalgebra::SVector;
use pim_isa::BankMode;
use pim_os::{
kernel::vmul,
pim::{self, vector::F16x1},
uart::Uart0,
};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
#[cfg(feature = "X1")]
pim::state::set_kernel(&vmul::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&vmul::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&vmul::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&vmul::KERNEL_X4);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(2 as _))),
));
let b = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(3 as _))),
));
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
let mut c = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::zeros(),
));
let dummy = Box::new(0);
// Verify everything is correctly initialized before PIM operation
barrier::dsb(barrier::SY);
// Execute kernel
{
pim::state::set_bank_mode(BankMode::PimAllBank);
pim_os::m5op::exit(0);
for _ in 0..10 {
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
}
pim_os::m5op::exit(0);
pim::state::set_bank_mode(BankMode::SingleBank);
}
// writeln!(Uart0, "{}", c.0).unwrap();
writeln!(Uart0, "Done").unwrap();
}

View File

@@ -1,2 +1,5 @@
pub mod legacy;
pub mod gemv;
pub mod haxpy;
pub mod legacy;
pub mod vadd;
pub mod vmul;

View File

@@ -5,7 +5,78 @@ use aarch64_cpu::asm::barrier;
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
@@ -79,13 +150,185 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP,
]);
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
matrix: &Matrix<X16R, X16C>,
input_vector: &interleaved_array::Vector<X16C>,
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 31,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true,
},
Instruction::JUMP {
offset: -1,
count: 63,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const X16R: usize, const R: usize>(
matrix: &Matrix<X16R, 8>,
input_vector: &interleaved_array::Vector<8>,
output_partial_sum_vector: &mut SVector<F16x16, R>,
dummy: &impl PimOperand,
) {
for block in input_vector.0.as_slice().iter() {
for block in input_vector.0.iter() {
block.execute_read();
}

359
pim-os/src/kernel/haxpy.rs Normal file
View File

@@ -0,0 +1,359 @@
use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 0 },
src2: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 0 },
src2: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 1 },
src2: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 0 },
src2: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 1 },
src2: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 2 },
src2: File::GrfA { index: 2 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 3 },
src2: File::GrfA { index: 3 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 0 },
src2: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 1 },
src2: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 2 },
src2: File::GrfA { index: 2 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 3 },
src2: File::GrfA { index: 3 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 4 },
src2: File::GrfA { index: 4 },
dst: File::GrfA { index: 4 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 5 },
src2: File::GrfA { index: 5 },
dst: File::GrfA { index: 5 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 6 },
src2: File::GrfA { index: 6 },
dst: File::GrfA { index: 6 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::SrfA { index: 7 },
src2: File::GrfA { index: 7 },
dst: File::GrfA { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfA { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfA { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
interleaved_scalar: &interleaved_array::Vector<1>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
interleaved_scalar.execute_read();
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

View File

@@ -1,3 +1,3 @@
pub mod matrix_matrix_mul;
pub mod matrix_scalar_mul;
pub mod matrix_vector_mul;
pub mod matrix_vector_mul;

View File

@@ -122,4 +122,4 @@ pub fn execute<const R: usize, const C: usize>(
dummy_array.execute_instruction_read_single_bank(0);
}
}
}

327
pim-os/src/kernel/vadd.rs Normal file
View File

@@ -0,0 +1,327 @@
use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 4 },
dst: File::GrfB { index: 4 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 5 },
dst: File::GrfB { index: 5 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 6 },
dst: File::GrfB { index: 6 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 7 },
dst: File::GrfB { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

327
pim-os/src/kernel/vmul.rs Normal file
View File

@@ -0,0 +1,327 @@
use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 4 },
dst: File::GrfB { index: 4 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 5 },
dst: File::GrfB { index: 5 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 6 },
dst: File::GrfB { index: 6 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 7 },
dst: File::GrfB { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());
dummy.execute_read();
}

View File

@@ -24,7 +24,7 @@ pub extern "C" fn entry() -> ! {
unsafe { main() }
m5op::exit();
m5op::exit(0);
loop {
compiler_fence(Ordering::SeqCst);

View File

@@ -3,24 +3,24 @@ use core::arch::global_asm;
global_asm!(include_str!("m5op.s"));
extern "C" {
fn m5_exit();
fn m5_reset_stats();
fn m5_dump_stats();
fn m5_dump_reset_stats();
fn m5_exit(delay_ns: u64);
fn m5_reset_stats(delay_ns: u64, period_ns: u64);
fn m5_dump_stats(delay_ns: u64, period_ns: u64);
fn m5_dump_reset_stats(delay_ns: u64, period_ns: u64);
}
pub fn exit() {
unsafe { m5_exit() }
pub fn exit(delay_ns: u64) {
unsafe { m5_exit(delay_ns) }
}
pub fn reset_stats() {
unsafe { m5_reset_stats() }
pub fn reset_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_reset_stats(delay_ns, period_ns) }
}
pub fn dump_stats() {
unsafe { m5_dump_stats() }
pub fn dump_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_dump_stats(delay_ns, period_ns) }
}
pub fn dump_reset_stats() {
unsafe { m5_dump_reset_stats() }
pub fn dump_reset_stats(delay_ns: u64, period_ns: u64) {
unsafe { m5_dump_reset_stats(delay_ns, period_ns) }
}

View File

@@ -1 +1 @@
pub mod array;
pub mod array;