Introduce X1, X2, X3, X4 complexity levels for all workloads

This commit is contained in:
2024-02-22 21:57:24 +01:00
parent ece54d5833
commit ba74572989
11 changed files with 758 additions and 54 deletions

View File

@@ -7,7 +7,18 @@ use core::fmt::Write;
use nalgebra::{SMatrix, SVector};
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 16;
#[cfg(feature = "X2")]
const ROWS: usize = 32;
#[cfg(feature = "X3")]
const ROWS: usize = 64;
#[cfg(feature = "X4")]
const ROWS: usize = 128;
const COLUMNS: usize = 128;
#[no_mangle]

View File

@@ -8,6 +8,16 @@ use nalgebra::SVector;
use num_traits::identities::Zero;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]

View File

@@ -7,6 +7,16 @@ use core::fmt::Write;
use nalgebra::SVector;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]

View File

@@ -7,6 +7,16 @@ use core::fmt::Write;
use nalgebra::SVector;
use pim_os::{pim::vector::F16x1, uart::Uart0};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
#[no_mangle]

View File

@@ -18,11 +18,35 @@ use pim_os::{
uart::Uart0,
};
#[cfg(feature = "X1")]
const ROWS: usize = 128;
#[cfg(feature = "X2")]
const ROWS: usize = 256;
#[cfg(feature = "X3")]
const ROWS: usize = 512;
#[cfg(feature = "X4")]
const ROWS: usize = 1024;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&haxpy::KERNEL);
#[cfg(feature = "X1")]
pim::state::set_kernel(&haxpy::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&haxpy::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&haxpy::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&haxpy::KERNEL_X4);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
@@ -49,7 +73,7 @@ pub extern "C" fn main() {
{
pim::state::set_bank_mode(BankMode::PimAllBank);
haxpy::execute(
haxpy::execute::<ROWS, BLOCKS>(
&a.0,
&b.0,
&interleaved_scalar_vector,

View File

@@ -15,11 +15,35 @@ use pim_os::{
uart::Uart0,
};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&vadd::KERNEL);
#[cfg(feature = "X1")]
pim::state::set_kernel(&vadd::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&vadd::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&vadd::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&vadd::KERNEL_X4);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
@@ -43,7 +67,7 @@ pub extern "C" fn main() {
{
pim::state::set_bank_mode(BankMode::PimAllBank);
vadd::execute(&a.0, &b.0, &mut c.0, dummy.as_ref());
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
pim::state::set_bank_mode(BankMode::SingleBank);
}

View File

@@ -15,11 +15,35 @@ use pim_os::{
uart::Uart0,
};
#[cfg(feature = "X1")]
const ROWS: usize = 256;
#[cfg(feature = "X2")]
const ROWS: usize = 512;
#[cfg(feature = "X3")]
const ROWS: usize = 1024;
#[cfg(feature = "X4")]
const ROWS: usize = 2048;
const ELEMENTS_PER_BANK: usize = 16;
const BANKS: usize = 16;
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
#[no_mangle]
pub extern "C" fn main() {
pim::state::set_kernel(&vmul::KERNEL);
#[cfg(feature = "X1")]
pim::state::set_kernel(&vmul::KERNEL_X1);
#[cfg(feature = "X2")]
pim::state::set_kernel(&vmul::KERNEL_X2);
#[cfg(feature = "X3")]
pim::state::set_kernel(&vmul::KERNEL_X3);
#[cfg(feature = "X4")]
pim::state::set_kernel(&vmul::KERNEL_X4);
let a = Box::new(pim::continuous_array::Vector(
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(2 as _))),
@@ -43,7 +67,7 @@ pub extern "C" fn main() {
{
pim::state::set_bank_mode(BankMode::PimAllBank);
vmul::execute(&a.0, &b.0, &mut c.0, dummy.as_ref());
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
pim::state::set_bank_mode(BankMode::SingleBank);
}

View File

@@ -47,7 +47,7 @@ pub const KERNEL_X1: Kernel = Kernel([
},
Instruction::JUMP {
offset: -1,
count: 15,
count: 7,
},
Instruction::FILL {
src: File::GrfB { index: 0 },

View File

@@ -2,7 +2,119 @@ use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::SrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::SrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
src2: File::SrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
@@ -23,51 +135,31 @@ pub const KERNEL: Kernel = Kernel([
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MUL {
src0: File::SrfA { index: 0 },
src1: File::GrfA { index: 0 },
dst: File::GrfA { index: 0 },
aam: false,
},
Instruction::ADD {
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::SrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::SrfA { index: 1 },
src1: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::ADD {
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
src2: File::SrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::SrfA { index: 2 },
src1: File::GrfA { index: 2 },
dst: File::GrfA { index: 2 },
aam: false,
},
Instruction::ADD {
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
src2: File::SrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::SrfA { index: 3 },
src1: File::GrfA { index: 3 },
dst: File::GrfA { index: 3 },
aam: false,
},
Instruction::ADD {
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
src2: File::SrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
@@ -102,9 +194,147 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize>(
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::SrfM { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 4 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 5 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 6 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 7 },
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::SrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
src2: File::SrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
src2: File::SrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
src2: File::SrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 4 },
src2: File::SrfA { index: 4 },
dst: File::GrfB { index: 4 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 5 },
src2: File::SrfA { index: 5 },
dst: File::GrfB { index: 5 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 6 },
src2: File::SrfA { index: 6 },
dst: File::GrfB { index: 6 },
aam: false,
},
Instruction::MAD {
src0: File::Bank,
src1: File::GrfA { index: 7 },
src2: File::SrfA { index: 7 },
dst: File::GrfB { index: 7 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 4 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 5 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 6 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 7 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
interleaved_scalar: &interleaved_array::Vector<1>,
@@ -113,18 +343,15 @@ pub fn execute<const R: usize>(
) {
interleaved_scalar.execute_read();
a.fixed_rows_with_step::<4>(0, 16 * 16)
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<4>(0, 16 * 16)
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| {
dummy.execute_read();
entry.execute_read();
});
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<4>(0, 16 * 16)
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());

View File

@@ -2,7 +2,189 @@ use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::ADD {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
@@ -125,19 +307,19 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP,
]);
pub fn execute<const R: usize>(
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<8>(0, 16 * 16)
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<8>(0, 16 * 16)
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<8>(0, 16 * 16)
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());

View File

@@ -2,7 +2,189 @@ use crate::pim::{operation::PimOperand, vector::F16x1};
use nalgebra::SVector;
use pim_isa::{File, Instruction, Kernel};
pub const KERNEL: Kernel = Kernel([
pub const KERNEL_X1: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X2: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X3: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 2 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 3 },
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 0 },
dst: File::GrfB { index: 0 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 2 },
dst: File::GrfB { index: 2 },
aam: false,
},
Instruction::MUL {
src0: File::Bank,
src1: File::GrfA { index: 3 },
dst: File::GrfB { index: 3 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 2 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 3 },
dst: File::Bank,
},
Instruction::EXIT,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);
pub const KERNEL_X4: Kernel = Kernel([
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 0 },
@@ -125,19 +307,19 @@ pub const KERNEL: Kernel = Kernel([
Instruction::NOP,
]);
pub fn execute<const R: usize>(
pub fn execute<const R: usize, const BLOCKS: usize>(
a: &SVector<F16x1, R>,
b: &SVector<F16x1, R>,
c: &mut SVector<F16x1, R>,
dummy: &impl PimOperand,
) {
a.fixed_rows_with_step::<8>(0, 16 * 16)
a.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
b.fixed_rows_with_step::<8>(0, 16 * 16)
b.fixed_rows_with_step::<BLOCKS>(0, 256)
.iter()
.for_each(|entry| entry.execute_read());
c.fixed_rows_with_step_mut::<8>(0, 16 * 16)
c.fixed_rows_with_step_mut::<BLOCKS>(0, 256)
.iter_mut()
.for_each(|entry| entry.execute_write());