diff --git a/pim-os/src/bin/classic_gemv.rs b/pim-os/src/bin/classic_gemv.rs index 23f7b90..8d04bce 100644 --- a/pim-os/src/bin/classic_gemv.rs +++ b/pim-os/src/bin/classic_gemv.rs @@ -7,7 +7,18 @@ use core::fmt::Write; use nalgebra::{SMatrix, SVector}; use pim_os::{pim::vector::F16x1, uart::Uart0}; +#[cfg(feature = "X1")] +const ROWS: usize = 16; + +#[cfg(feature = "X2")] const ROWS: usize = 32; + +#[cfg(feature = "X3")] +const ROWS: usize = 64; + +#[cfg(feature = "X4")] +const ROWS: usize = 128; + const COLUMNS: usize = 128; #[no_mangle] diff --git a/pim-os/src/bin/classic_haxpy.rs b/pim-os/src/bin/classic_haxpy.rs index d3dc4cc..68fb990 100644 --- a/pim-os/src/bin/classic_haxpy.rs +++ b/pim-os/src/bin/classic_haxpy.rs @@ -8,6 +8,16 @@ use nalgebra::SVector; use num_traits::identities::Zero; use pim_os::{pim::vector::F16x1, uart::Uart0}; +#[cfg(feature = "X1")] +const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] const ROWS: usize = 2048; #[no_mangle] diff --git a/pim-os/src/bin/classic_vadd.rs b/pim-os/src/bin/classic_vadd.rs index b78100b..07ed84a 100644 --- a/pim-os/src/bin/classic_vadd.rs +++ b/pim-os/src/bin/classic_vadd.rs @@ -7,6 +7,16 @@ use core::fmt::Write; use nalgebra::SVector; use pim_os::{pim::vector::F16x1, uart::Uart0}; +#[cfg(feature = "X1")] +const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] const ROWS: usize = 2048; #[no_mangle] diff --git a/pim-os/src/bin/classic_vmul.rs b/pim-os/src/bin/classic_vmul.rs index 6e0fbc4..4bd7def 100644 --- a/pim-os/src/bin/classic_vmul.rs +++ b/pim-os/src/bin/classic_vmul.rs @@ -7,6 +7,16 @@ use core::fmt::Write; use nalgebra::SVector; use pim_os::{pim::vector::F16x1, uart::Uart0}; +#[cfg(feature = "X1")] +const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] const ROWS: usize = 2048; #[no_mangle] diff --git a/pim-os/src/bin/haxpy.rs b/pim-os/src/bin/haxpy.rs index 8c1e77e..0e21ba2 100644 --- a/pim-os/src/bin/haxpy.rs +++ b/pim-os/src/bin/haxpy.rs @@ -18,11 +18,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] +const ROWS: usize = 128; + +#[cfg(feature = "X2")] +const ROWS: usize = 256; + +#[cfg(feature = "X3")] +const ROWS: usize = 512; + +#[cfg(feature = "X4")] const ROWS: usize = 1024; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&haxpy::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&haxpy::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&haxpy::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&haxpy::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&haxpy::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), @@ -49,7 +73,7 @@ pub extern "C" fn main() { { pim::state::set_bank_mode(BankMode::PimAllBank); - haxpy::execute( + haxpy::execute::( &a.0, &b.0, &interleaved_scalar_vector, diff --git a/pim-os/src/bin/vadd.rs b/pim-os/src/bin/vadd.rs index 53b20dc..5e15dd0 100644 --- a/pim-os/src/bin/vadd.rs +++ b/pim-os/src/bin/vadd.rs @@ -15,11 +15,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] +const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] const ROWS: usize = 2048; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&vadd::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&vadd::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&vadd::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&vadd::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&vadd::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|i, _| F16x1(f16::from_f32(i as _))), @@ -43,7 +67,7 @@ pub extern "C" fn main() { { pim::state::set_bank_mode(BankMode::PimAllBank); - vadd::execute(&a.0, &b.0, &mut c.0, dummy.as_ref()); + vadd::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); pim::state::set_bank_mode(BankMode::SingleBank); } diff --git a/pim-os/src/bin/vmul.rs b/pim-os/src/bin/vmul.rs index ef59a81..9d3ab84 100644 --- a/pim-os/src/bin/vmul.rs +++ b/pim-os/src/bin/vmul.rs @@ -15,11 +15,35 @@ use pim_os::{ uart::Uart0, }; +#[cfg(feature = "X1")] +const ROWS: usize = 256; + +#[cfg(feature = "X2")] +const ROWS: usize = 512; + +#[cfg(feature = "X3")] +const ROWS: usize = 1024; + +#[cfg(feature = "X4")] const ROWS: usize = 2048; +const ELEMENTS_PER_BANK: usize = 16; +const BANKS: usize = 16; +const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS); + #[no_mangle] pub extern "C" fn main() { - pim::state::set_kernel(&vmul::KERNEL); + #[cfg(feature = "X1")] + pim::state::set_kernel(&vmul::KERNEL_X1); + + #[cfg(feature = "X2")] + pim::state::set_kernel(&vmul::KERNEL_X2); + + #[cfg(feature = "X3")] + pim::state::set_kernel(&vmul::KERNEL_X3); + + #[cfg(feature = "X4")] + pim::state::set_kernel(&vmul::KERNEL_X4); let a = Box::new(pim::continuous_array::Vector( SVector::::from_fn(|_, _| F16x1(f16::from_f32(2 as _))), @@ -43,7 +67,7 @@ pub extern "C" fn main() { { pim::state::set_bank_mode(BankMode::PimAllBank); - vmul::execute(&a.0, &b.0, &mut c.0, dummy.as_ref()); + vmul::execute::(&a.0, &b.0, &mut c.0, dummy.as_ref()); pim::state::set_bank_mode(BankMode::SingleBank); } diff --git a/pim-os/src/kernel/gemv.rs b/pim-os/src/kernel/gemv.rs index e5bef30..196bfc5 100644 --- a/pim-os/src/kernel/gemv.rs +++ b/pim-os/src/kernel/gemv.rs @@ -47,7 +47,7 @@ pub const KERNEL_X1: Kernel = Kernel([ }, Instruction::JUMP { offset: -1, - count: 15, + count: 7, }, Instruction::FILL { src: File::GrfB { index: 0 }, diff --git a/pim-os/src/kernel/haxpy.rs b/pim-os/src/kernel/haxpy.rs index 7587af0..158c9f3 100644 --- a/pim-os/src/kernel/haxpy.rs +++ b/pim-os/src/kernel/haxpy.rs @@ -2,7 +2,119 @@ use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::SrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::SrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + src2: File::SrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::SrfM { index: 0 }, @@ -23,51 +135,31 @@ pub const KERNEL: Kernel = Kernel([ src: File::Bank, dst: File::GrfA { index: 3 }, }, - Instruction::MUL { - src0: File::SrfA { index: 0 }, - src1: File::GrfA { index: 0 }, - dst: File::GrfA { index: 0 }, - aam: false, - }, - Instruction::ADD { + Instruction::MAD { src0: File::Bank, src1: File::GrfA { index: 0 }, + src2: File::SrfA { index: 0 }, dst: File::GrfB { index: 0 }, aam: false, }, - Instruction::MUL { - src0: File::SrfA { index: 1 }, - src1: File::GrfA { index: 1 }, - dst: File::GrfA { index: 1 }, - aam: false, - }, - Instruction::ADD { + Instruction::MAD { src0: File::Bank, src1: File::GrfA { index: 1 }, + src2: File::SrfA { index: 1 }, dst: File::GrfB { index: 1 }, aam: false, }, - Instruction::MUL { - src0: File::SrfA { index: 2 }, - src1: File::GrfA { index: 2 }, - dst: File::GrfA { index: 2 }, - aam: false, - }, - Instruction::ADD { + Instruction::MAD { src0: File::Bank, src1: File::GrfA { index: 2 }, + src2: File::SrfA { index: 2 }, dst: File::GrfB { index: 2 }, aam: false, }, - Instruction::MUL { - src0: File::SrfA { index: 3 }, - src1: File::GrfA { index: 3 }, - dst: File::GrfA { index: 3 }, - aam: false, - }, - Instruction::ADD { + Instruction::MAD { src0: File::Bank, src1: File::GrfA { index: 3 }, + src2: File::SrfA { index: 3 }, dst: File::GrfB { index: 3 }, aam: false, }, @@ -102,9 +194,147 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, Instruction::NOP, Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, ]); -pub fn execute( +pub const KERNEL_X4: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::SrfM { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 4 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 5 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 6 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 7 }, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + src2: File::SrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + src2: File::SrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + src2: File::SrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + src2: File::SrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 4 }, + src2: File::SrfA { index: 4 }, + dst: File::GrfB { index: 4 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 5 }, + src2: File::SrfA { index: 5 }, + dst: File::GrfB { index: 5 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 6 }, + src2: File::SrfA { index: 6 }, + dst: File::GrfB { index: 6 }, + aam: false, + }, + Instruction::MAD { + src0: File::Bank, + src1: File::GrfA { index: 7 }, + src2: File::SrfA { index: 7 }, + dst: File::GrfB { index: 7 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 4 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 5 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 6 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 7 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub fn execute( a: &SVector, b: &SVector, interleaved_scalar: &interleaved_array::Vector<1>, @@ -113,18 +343,15 @@ pub fn execute( ) { interleaved_scalar.execute_read(); - a.fixed_rows_with_step::<4>(0, 16 * 16) + a.fixed_rows_with_step::(0, 256) .iter() .for_each(|entry| entry.execute_read()); - b.fixed_rows_with_step::<4>(0, 16 * 16) + b.fixed_rows_with_step::(0, 256) .iter() - .for_each(|entry| { - dummy.execute_read(); - entry.execute_read(); - }); + .for_each(|entry| entry.execute_read()); - c.fixed_rows_with_step_mut::<4>(0, 16 * 16) + c.fixed_rows_with_step_mut::(0, 256) .iter_mut() .for_each(|entry| entry.execute_write()); diff --git a/pim-os/src/kernel/vadd.rs b/pim-os/src/kernel/vadd.rs index 1461f99..d352192 100644 --- a/pim-os/src/kernel/vadd.rs +++ b/pim-os/src/kernel/vadd.rs @@ -2,7 +2,189 @@ use crate::pim::{operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::ADD { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -125,19 +307,19 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); -pub fn execute( +pub fn execute( a: &SVector, b: &SVector, c: &mut SVector, dummy: &impl PimOperand, ) { - a.fixed_rows_with_step::<8>(0, 16 * 16) + a.fixed_rows_with_step::(0, 256) .iter() .for_each(|entry| entry.execute_read()); - b.fixed_rows_with_step::<8>(0, 16 * 16) + b.fixed_rows_with_step::(0, 256) .iter() .for_each(|entry| entry.execute_read()); - c.fixed_rows_with_step_mut::<8>(0, 16 * 16) + c.fixed_rows_with_step_mut::(0, 256) .iter_mut() .for_each(|entry| entry.execute_write()); diff --git a/pim-os/src/kernel/vmul.rs b/pim-os/src/kernel/vmul.rs index 835878e..0da7cf0 100644 --- a/pim-os/src/kernel/vmul.rs +++ b/pim-os/src/kernel/vmul.rs @@ -2,7 +2,189 @@ use crate::pim::{operation::PimOperand, vector::F16x1}; use nalgebra::SVector; use pim_isa::{File, Instruction, Kernel}; -pub const KERNEL: Kernel = Kernel([ +pub const KERNEL_X1: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X2: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X3: Kernel = Kernel([ + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 0 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 1 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 2 }, + }, + Instruction::MOV { + src: File::Bank, + dst: File::GrfA { index: 3 }, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 0 }, + dst: File::GrfB { index: 0 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 1 }, + dst: File::GrfB { index: 1 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 2 }, + dst: File::GrfB { index: 2 }, + aam: false, + }, + Instruction::MUL { + src0: File::Bank, + src1: File::GrfA { index: 3 }, + dst: File::GrfB { index: 3 }, + aam: false, + }, + Instruction::FILL { + src: File::GrfB { index: 0 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 1 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 2 }, + dst: File::Bank, + }, + Instruction::FILL { + src: File::GrfB { index: 3 }, + dst: File::Bank, + }, + Instruction::EXIT, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, + Instruction::NOP, +]); + +pub const KERNEL_X4: Kernel = Kernel([ Instruction::MOV { src: File::Bank, dst: File::GrfA { index: 0 }, @@ -125,19 +307,19 @@ pub const KERNEL: Kernel = Kernel([ Instruction::NOP, ]); -pub fn execute( +pub fn execute( a: &SVector, b: &SVector, c: &mut SVector, dummy: &impl PimOperand, ) { - a.fixed_rows_with_step::<8>(0, 16 * 16) + a.fixed_rows_with_step::(0, 256) .iter() .for_each(|entry| entry.execute_read()); - b.fixed_rows_with_step::<8>(0, 16 * 16) + b.fixed_rows_with_step::(0, 256) .iter() .for_each(|entry| entry.execute_read()); - c.fixed_rows_with_step_mut::<8>(0, 16 * 16) + c.fixed_rows_with_step_mut::(0, 256) .iter_mut() .for_each(|entry| entry.execute_write());