diff --git a/pim-os/src/bin/gemv.rs b/pim-os/src/bin/gemv.rs index 99fc76d..faf7ab6 100644 --- a/pim-os/src/bin/gemv.rs +++ b/pim-os/src/bin/gemv.rs @@ -23,9 +23,31 @@ const COLUMNS: usize = 128; const X16_ROWS: usize = ROWS / 16; const X16_COLUMNS: usize = COLUMNS / 16; +#[cfg(feature = "X1")] +const REPETITIONS: usize = 1; + +#[cfg(feature = "X2")] +const REPETITIONS: usize = 2; + +#[cfg(feature = "X3")] +const REPETITIONS: usize = 4; + +#[cfg(feature = "X4")] +const REPETITIONS: usize = 8; + #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&gemv::KERNEL); + // #[cfg(feature = "X1")] + // pim::state::set_kernel(&gemv::KERNEL_X1); + + // #[cfg(feature = "X2")] + // pim::state::set_kernel(&gemv::KERNEL_X2); + + // #[cfg(feature = "X3")] + // pim::state::set_kernel(&gemv::KERNEL_X3); + + // #[cfg(feature = "X4")] + pim::state::set_kernel(&gemv::KERNEL_X4); let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); matrix.fill_lower_triangle(F16x1::one(), 0); @@ -42,26 +64,36 @@ pub extern "C" fn main() { // Verify everything is correctly initialized before PIM operation barrier::dsb(barrier::SY); - // Execute kernel - pim::state::set_bank_mode(BankMode::PimAllBank); - gemv::execute( - pim_matrix.as_ref(), - interleaved_input_vector.as_ref(), - output_partial_sum_vector.as_mut(), - dummy.as_ref(), - ); - pim::state::set_bank_mode(BankMode::SingleBank); + pim_os::m5op::exit(0); + for _ in 0..10 { + // Execute kernel + { + pim::state::set_bank_mode(BankMode::PimAllBank); - writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + for _ in 0..REPETITIONS { + gemv::execute( + pim_matrix.as_ref(), + interleaved_input_vector.as_ref(), + output_partial_sum_vector.as_mut(), + dummy.as_ref(), + ); + } - let output_vector = SVector::::from_fn(|r, _| { - output_partial_sum_vector[r] - .0 - .iter() - .fold(F16x1::zero(), |acc, val| acc + *val) - }); - core::hint::black_box(output_vector); + pim::state::set_bank_mode(BankMode::SingleBank); + } - writeln!(Uart0, "{output_vector}").unwrap(); + // writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + + let output_vector = SVector::::from_fn(|r, _| { + output_partial_sum_vector[r] + .0 + .iter() + .fold(F16x1::zero(), |acc, val| acc + *val) + }); + core::hint::black_box(output_vector); + } + pim_os::m5op::exit(0); + + // writeln!(Uart0, "{output_vector}").unwrap(); writeln!(Uart0, "Done").unwrap(); } diff --git a/pim-os/src/bin/gemv_layers.rs b/pim-os/src/bin/gemv_layers.rs new file mode 100644 index 0000000..cc0c251 --- /dev/null +++ b/pim-os/src/bin/gemv_layers.rs @@ -0,0 +1,91 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use aarch64_cpu::asm::barrier; +use alloc::boxed::Box; +use core::fmt::Write; +use nalgebra::{SMatrix, SVector}; +use num_traits::{One, Zero}; +use pim_isa::BankMode; +use pim_os::{ + kernel::gemv, + pim::{ + self, interleaved_array, + vector::{F16x1, F16x16}, + }, + uart::Uart0, +}; + +#[cfg(feature = "X1")] +const REPETITIONS: usize = 1; + +#[cfg(feature = "X2")] +const REPETITIONS: usize = 2; + +#[cfg(feature = "X3")] +const REPETITIONS: usize = 4; + +#[cfg(feature = "X4")] +const REPETITIONS: usize = 8; + +const ROWS: usize = 128; +const COLUMNS: usize = 128; // Has to be 128. Assume DIMENSIONS and fill rest with zeros. +const X16_ROWS: usize = ROWS / 16; +const X16_COLUMNS: usize = COLUMNS / 16; + +#[no_mangle] +pub extern "C" fn main() { + pim::state::set_kernel(&gemv::KERNEL_X4); + + let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros(); + matrix.fill_lower_triangle(F16x1::one(), 0); + + let pim_matrix = Box::new(pim::continuous_array::Matrix::::from(matrix)); + + let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one()); + + let mut output_partial_sum_vector = Box::new(SVector::::zeros()); + + let dummy = Box::new(0); + + // Verify everything is correctly initialized before PIM operation + barrier::dsb(barrier::SY); + + pim_os::m5op::exit(0); + for _ in 0..1 { + let interleaved_input_vector = + Box::new(interleaved_array::Vector::from(input_vector.clone())); + + // Execute kernel + { + pim::state::set_bank_mode(BankMode::PimAllBank); + + for _ in 0..(REPETITIONS * REPETITIONS) { + gemv::execute( + pim_matrix.as_ref(), + interleaved_input_vector.as_ref(), + output_partial_sum_vector.as_mut(), + dummy.as_ref(), + ); + } + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{output_partial_sum_vector}").unwrap(); + + let output_vector = SVector::::from_fn(|r, _| { + output_partial_sum_vector[r] + .0 + .iter() + .fold(F16x1::zero(), |acc, val| acc + *val) + }); + + core::hint::black_box(output_vector); + } + pim_os::m5op::exit(0); + + writeln!(Uart0, "Done").unwrap(); +} diff --git a/pim-os/src/bin/haxpy.rs b/pim-os/src/bin/haxpy.rs index 77bacef..c51df62 100644 --- a/pim-os/src/bin/haxpy.rs +++ b/pim-os/src/bin/haxpy.rs @@ -18,14 +18,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] +const ROWS: usize = 2048; + const ELEMENTS_PER_BANK: usize = 16; const BANKS: usize = 16; const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&haxpy::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&haxpy::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&haxpy::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&haxpy::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&haxpy::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), @@ -37,7 +58,7 @@ pub extern "C" fn main() { let scalar_vector = SVector::::from_element(F16x16([F16x1(f16::NEG_ONE); 16])); let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector)); - writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + // writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); let mut c = Box::new(pim::continuous_array::Vector( SVector::::zeros(), @@ -49,16 +70,24 @@ pub extern "C" fn main() { barrier::dsb(barrier::SY); // Execute kernel - pim::state::set_bank_mode(BankMode::PimAllBank); - haxpy::execute::( - &a.0, - &b.0, - &interleaved_scalar_vector, - &mut c.0, - dummy.as_ref(), - ); - pim::state::set_bank_mode(BankMode::SingleBank); + { + pim::state::set_bank_mode(BankMode::PimAllBank); - writeln!(Uart0, "{}", c.0).unwrap(); + pim_os::m5op::exit(0); + for _ in 0..10 { + haxpy::execute::( + &a.0, + &b.0, + &interleaved_scalar_vector, + &mut c.0, + dummy.as_ref(), + ); + } + pim_os::m5op::exit(0); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{}", c.0).unwrap(); writeln!(Uart0, "Done").unwrap(); } diff --git a/pim-os/src/bin/vadd.rs b/pim-os/src/bin/vadd.rs index cdb87be..5892fc9 100644 --- a/pim-os/src/bin/vadd.rs +++ b/pim-os/src/bin/vadd.rs @@ -15,14 +15,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] +const ROWS: usize = 2048; + const ELEMENTS_PER_BANK: usize = 16; const BANKS: usize = 16; const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&vadd::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&vadd::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&vadd::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&vadd::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&vadd::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), @@ -31,7 +52,7 @@ pub extern "C" fn main() { SVector::::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))), )); - writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + // writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); let mut c = Box::new(pim::continuous_array::Vector( SVector::::zeros(), @@ -43,10 +64,18 @@ pub extern "C" fn main() { barrier::dsb(barrier::SY); // Execute kernel - pim::state::set_bank_mode(BankMode::PimAllBank); - vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); - pim::state::set_bank_mode(BankMode::SingleBank); + { + pim::state::set_bank_mode(BankMode::PimAllBank); - writeln!(Uart0, "{}", c.0).unwrap(); + pim_os::m5op::exit(0); + for _ in 0..10 { + vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + } + pim_os::m5op::exit(0); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{}", c.0).unwrap(); writeln!(Uart0, "Done").unwrap(); } diff --git a/pim-os/src/bin/vmul.rs b/pim-os/src/bin/vmul.rs index b5ac022..c9ab53b 100644 --- a/pim-os/src/bin/vmul.rs +++ b/pim-os/src/bin/vmul.rs @@ -15,14 +15,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] +const ROWS: usize = 2048; + const ELEMENTS_PER_BANK: usize = 16; const BANKS: usize = 16; const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&vmul::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&vmul::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&vmul::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&vmul::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&vmul::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|_, _| F16x1(f16::from_f32(2 as _))), @@ -31,7 +52,7 @@ pub extern "C" fn main() { SVector::::from_fn(|_, _| F16x1(f16::from_f32(3 as _))), )); - writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); + // writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap(); let mut c = Box::new(pim::continuous_array::Vector( SVector::::zeros(), @@ -43,10 +64,18 @@ pub extern "C" fn main() { barrier::dsb(barrier::SY); // Execute kernel - pim::state::set_bank_mode(BankMode::PimAllBank); - vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); - pim::state::set_bank_mode(BankMode::SingleBank); + { + pim::state::set_bank_mode(BankMode::PimAllBank); - writeln!(Uart0, "{}", c.0).unwrap(); + pim_os::m5op::exit(0); + for _ in 0..10 { + vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); + } + pim_os::m5op::exit(0); + + pim::state::set_bank_mode(BankMode::SingleBank); + } + + // writeln!(Uart0, "{}", c.0).unwrap(); writeln!(Uart0, "Done").unwrap(); } diff --git a/pim-os/src/kernel/gemv.rs b/pim-os/src/kernel/gemv.rs index 082b5d2..806ea55 100644 --- a/pim-os/src/kernel/gemv.rs +++ b/pim-os/src/kernel/gemv.rs @@ -5,7 +5,7 @@ use aarch64_cpu::asm::barrier; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -76,6 +76,252 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 15, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 31, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAC { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::GrfB { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: true, + }, + Instruction::JUMP { + offset: -1, + count: 63, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + pub fn execute( matrix: &Matrix, input_vector: &interleaved_array::Vector<8>, diff --git a/pim-os/src/kernel/haxpy.rs b/pim-os/src/kernel/haxpy.rs index 087ce85..6a756a8 100644 --- a/pim-os/src/kernel/haxpy.rs +++ b/pim-os/src/kernel/haxpy.rs @@ -2,7 +2,7 @@ use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::SrfM { index: 0 }, @@ -52,6 +52,288 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 0 }, + src2: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 1 }, + src2: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 0 }, + src2: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 1 }, + src2: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 2 }, + src2: File::GrfA { index: 2 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 3 }, + src2: File::GrfA { index: 3 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 0 }, + src2: File::GrfA { index: 0 }, + dst: File::GrfA { index: 0 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 1 }, + src2: File::GrfA { index: 1 }, + dst: File::GrfA { index: 1 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 2 }, + src2: File::GrfA { index: 2 }, + dst: File::GrfA { index: 2 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 3 }, + src2: File::GrfA { index: 3 }, + dst: File::GrfA { index: 3 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 4 }, + src2: File::GrfA { index: 4 }, + dst: File::GrfA { index: 4 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 5 }, + src2: File::GrfA { index: 5 }, + dst: File::GrfA { index: 5 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 6 }, + src2: File::GrfA { index: 6 }, + dst: File::GrfA { index: 6 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::SrfA { index: 7 }, + src2: File::GrfA { index: 7 }, + dst: File::GrfA { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfA { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfA { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + pub fn execute( a: &SVector, b: &SVector, diff --git a/pim-os/src/kernel/vadd.rs b/pim-os/src/kernel/vadd.rs index f638f0d..d352192 100644 --- a/pim-os/src/kernel/vadd.rs +++ b/pim-os/src/kernel/vadd.rs @@ -2,7 +2,7 @@ use crate::pim::{operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -48,6 +48,265 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 4 }, + dst: File::GrfB { index: 4 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 5 }, + dst: File::GrfB { index: 5 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 6 }, + dst: File::GrfB { index: 6 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 7 }, + dst: File::GrfB { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + pub fn execute( a: &SVector, b: &SVector, diff --git a/pim-os/src/kernel/vmul.rs b/pim-os/src/kernel/vmul.rs index d11dced..0da7cf0 100644 --- a/pim-os/src/kernel/vmul.rs +++ b/pim-os/src/kernel/vmul.rs @@ -2,7 +2,7 @@ use crate::pim::{operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -48,6 +48,265 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 4 }, + dst: File::GrfB { index: 4 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 5 }, + dst: File::GrfB { index: 5 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 6 }, + dst: File::GrfB { index: 6 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 7 }, + dst: File::GrfB { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + pub fn execute( a: &SVector, b: &SVector,