Compare commits
2 Commits
ac60c8285c
...
93a81e4997
| Author | SHA1 | Date | |
|---|---|---|---|
| 93a81e4997 | |||
| b20f4033c9 |
6
pim-os/build.sh
Executable file
6
pim-os/build.sh
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CARGO_TARGET_DIR=kernels/X1 cargo build --release --features X1
|
||||||
|
CARGO_TARGET_DIR=kernels/X2 cargo build --release --features X2
|
||||||
|
CARGO_TARGET_DIR=kernels/X3 cargo build --release --features X3
|
||||||
|
CARGO_TARGET_DIR=kernels/X4 cargo build --release --features X4
|
||||||
45
pim-os/src/bin/classic_gemv.rs
Normal file
45
pim-os/src/bin/classic_gemv.rs
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use core::{arch::asm, fmt::Write};
|
||||||
|
use nalgebra::{SMatrix, SVector};
|
||||||
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const ROWS: usize = 128;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
const COLUMNS: usize = 128;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
||||||
|
let input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
||||||
|
|
||||||
|
// Flush cache
|
||||||
|
for element in matrix.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in input_vector.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
let output_vector = matrix * input_vector;
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
46
pim-os/src/bin/classic_gemv_layers.rs
Normal file
46
pim-os/src/bin/classic_gemv_layers.rs
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use core::fmt::Write;
|
||||||
|
use half::f16;
|
||||||
|
use nalgebra::{SMatrix, SVector};
|
||||||
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const DIMENSIONS: usize = 128;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const DIMENSIONS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const DIMENSIONS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const DIMENSIONS: usize = 1024;
|
||||||
|
|
||||||
|
const ROWS: usize = DIMENSIONS;
|
||||||
|
const COLUMNS: usize = DIMENSIONS;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
let matrix = SMatrix::<F16x1, ROWS, COLUMNS>::zeros();
|
||||||
|
let mut input_vector = SVector::<F16x1, COLUMNS>::zeros();
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..1 {
|
||||||
|
input_vector = matrix * input_vector;
|
||||||
|
input_vector = input_vector.map(|element| {
|
||||||
|
if element.0 < f16::ZERO {
|
||||||
|
F16x1(f16::ZERO)
|
||||||
|
} else {
|
||||||
|
element
|
||||||
|
}
|
||||||
|
});
|
||||||
|
core::hint::black_box(input_vector);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
46
pim-os/src/bin/classic_haxpy.rs
Normal file
46
pim-os/src/bin/classic_haxpy.rs
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use core::{arch::asm, fmt::Write};
|
||||||
|
use nalgebra::SVector;
|
||||||
|
use num_traits::identities::Zero;
|
||||||
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
let a = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
let s = F16x1::zero();
|
||||||
|
|
||||||
|
// Flush cache
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
let a_s = a * s;
|
||||||
|
let c = a_s.component_mul(&b);
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
42
pim-os/src/bin/classic_vadd.rs
Normal file
42
pim-os/src/bin/classic_vadd.rs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use core::{arch::asm, fmt::Write};
|
||||||
|
use nalgebra::SVector;
|
||||||
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
let a = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
let c = a + b;
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
42
pim-os/src/bin/classic_vmul.rs
Normal file
42
pim-os/src/bin/classic_vmul.rs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use core::{arch::asm, fmt::Write};
|
||||||
|
use nalgebra::SVector;
|
||||||
|
use pim_os::{pim::vector::F16x1, uart::Uart0};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
let a = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
let b = SVector::<F16x1, ROWS>::zeros();
|
||||||
|
|
||||||
|
for element in a.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
for element in b.iter() {
|
||||||
|
unsafe { asm!("dc civac, {val}", val = in(reg) element) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
let c = a.component_mul(&b);
|
||||||
|
core::hint::black_box(c);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
@@ -23,9 +23,31 @@ const COLUMNS: usize = 128;
|
|||||||
const X16_ROWS: usize = ROWS / 16;
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const REPETITIONS: usize = 1;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const REPETITIONS: usize = 2;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const REPETITIONS: usize = 4;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const REPETITIONS: usize = 8;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&gemv::KERNEL);
|
// #[cfg(feature = "X1")]
|
||||||
|
// pim::state::set_kernel(&gemv::KERNEL_X1);
|
||||||
|
|
||||||
|
// #[cfg(feature = "X2")]
|
||||||
|
// pim::state::set_kernel(&gemv::KERNEL_X2);
|
||||||
|
|
||||||
|
// #[cfg(feature = "X3")]
|
||||||
|
// pim::state::set_kernel(&gemv::KERNEL_X3);
|
||||||
|
|
||||||
|
// #[cfg(feature = "X4")]
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
|
|
||||||
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
||||||
matrix.fill_lower_triangle(F16x1::one(), 0);
|
matrix.fill_lower_triangle(F16x1::one(), 0);
|
||||||
@@ -42,17 +64,25 @@ pub extern "C" fn main() {
|
|||||||
// Verify everything is correctly initialized before PIM operation
|
// Verify everything is correctly initialized before PIM operation
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
// Execute kernel
|
// Execute kernel
|
||||||
|
{
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
|
for _ in 0..REPETITIONS {
|
||||||
gemv::execute(
|
gemv::execute(
|
||||||
pim_matrix.as_ref(),
|
pim_matrix.as_ref(),
|
||||||
interleaved_input_vector.as_ref(),
|
interleaved_input_vector.as_ref(),
|
||||||
output_partial_sum_vector.as_mut(),
|
output_partial_sum_vector.as_mut(),
|
||||||
dummy.as_ref(),
|
dummy.as_ref(),
|
||||||
);
|
);
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
}
|
||||||
|
|
||||||
writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
||||||
|
|
||||||
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
||||||
output_partial_sum_vector[r]
|
output_partial_sum_vector[r]
|
||||||
@@ -61,7 +91,9 @@ pub extern "C" fn main() {
|
|||||||
.fold(F16x1::zero(), |acc, val| acc + *val)
|
.fold(F16x1::zero(), |acc, val| acc + *val)
|
||||||
});
|
});
|
||||||
core::hint::black_box(output_vector);
|
core::hint::black_box(output_vector);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "{output_vector}").unwrap();
|
// writeln!(Uart0, "{output_vector}").unwrap();
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
91
pim-os/src/bin/gemv_layers.rs
Normal file
91
pim-os/src/bin/gemv_layers.rs
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
|
use aarch64_cpu::asm::barrier;
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use core::fmt::Write;
|
||||||
|
use nalgebra::{SMatrix, SVector};
|
||||||
|
use num_traits::{One, Zero};
|
||||||
|
use pim_isa::BankMode;
|
||||||
|
use pim_os::{
|
||||||
|
kernel::gemv,
|
||||||
|
pim::{
|
||||||
|
self, interleaved_array,
|
||||||
|
vector::{F16x1, F16x16},
|
||||||
|
},
|
||||||
|
uart::Uart0,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const REPETITIONS: usize = 1;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const REPETITIONS: usize = 2;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const REPETITIONS: usize = 4;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const REPETITIONS: usize = 8;
|
||||||
|
|
||||||
|
const ROWS: usize = 128;
|
||||||
|
const COLUMNS: usize = 128; // Has to be 128. Assume DIMENSIONS and fill rest with zeros.
|
||||||
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn main() {
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
|
|
||||||
|
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
||||||
|
matrix.fill_lower_triangle(F16x1::one(), 0);
|
||||||
|
|
||||||
|
let pim_matrix = Box::new(pim::continuous_array::Matrix::<X16_ROWS, X16_COLUMNS>::from(matrix));
|
||||||
|
|
||||||
|
let input_vector = SVector::<_, X16_COLUMNS>::from_element(F16x16::one());
|
||||||
|
|
||||||
|
let mut output_partial_sum_vector = Box::new(SVector::<F16x16, ROWS>::zeros());
|
||||||
|
|
||||||
|
let dummy = Box::new(0);
|
||||||
|
|
||||||
|
// Verify everything is correctly initialized before PIM operation
|
||||||
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..1 {
|
||||||
|
let interleaved_input_vector =
|
||||||
|
Box::new(interleaved_array::Vector::from(input_vector.clone()));
|
||||||
|
|
||||||
|
// Execute kernel
|
||||||
|
{
|
||||||
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
|
for _ in 0..(REPETITIONS * REPETITIONS) {
|
||||||
|
gemv::execute(
|
||||||
|
pim_matrix.as_ref(),
|
||||||
|
interleaved_input_vector.as_ref(),
|
||||||
|
output_partial_sum_vector.as_mut(),
|
||||||
|
dummy.as_ref(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{output_partial_sum_vector}").unwrap();
|
||||||
|
|
||||||
|
let output_vector = SVector::<F16x1, ROWS>::from_fn(|r, _| {
|
||||||
|
output_partial_sum_vector[r]
|
||||||
|
.0
|
||||||
|
.iter()
|
||||||
|
.fold(F16x1::zero(), |acc, val| acc + *val)
|
||||||
|
});
|
||||||
|
|
||||||
|
core::hint::black_box(output_vector);
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
writeln!(Uart0, "Done").unwrap();
|
||||||
|
}
|
||||||
@@ -18,14 +18,35 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 256;
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
const ELEMENTS_PER_BANK: usize = 16;
|
const ELEMENTS_PER_BANK: usize = 16;
|
||||||
const BANKS: usize = 16;
|
const BANKS: usize = 16;
|
||||||
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&haxpy::KERNEL);
|
#[cfg(feature = "X1")]
|
||||||
|
pim::state::set_kernel(&haxpy::KERNEL_X1);
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
pim::state::set_kernel(&haxpy::KERNEL_X2);
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
pim::state::set_kernel(&haxpy::KERNEL_X3);
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
pim::state::set_kernel(&haxpy::KERNEL_X4);
|
||||||
|
|
||||||
let a = Box::new(pim::continuous_array::Vector(
|
let a = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
|
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
|
||||||
@@ -37,7 +58,7 @@ pub extern "C" fn main() {
|
|||||||
let scalar_vector = SVector::<F16x16, 1>::from_element(F16x16([F16x1(f16::NEG_ONE); 16]));
|
let scalar_vector = SVector::<F16x16, 1>::from_element(F16x16([F16x1(f16::NEG_ONE); 16]));
|
||||||
let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector));
|
let interleaved_scalar_vector = Box::new(interleaved_array::Vector::from(scalar_vector));
|
||||||
|
|
||||||
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
||||||
|
|
||||||
let mut c = Box::new(pim::continuous_array::Vector(
|
let mut c = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::zeros(),
|
SVector::<F16x1, ROWS>::zeros(),
|
||||||
@@ -49,7 +70,11 @@ pub extern "C" fn main() {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
// Execute kernel
|
// Execute kernel
|
||||||
|
{
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
|
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
haxpy::execute::<ROWS, BLOCKS>(
|
haxpy::execute::<ROWS, BLOCKS>(
|
||||||
&a.0,
|
&a.0,
|
||||||
&b.0,
|
&b.0,
|
||||||
@@ -57,8 +82,12 @@ pub extern "C" fn main() {
|
|||||||
&mut c.0,
|
&mut c.0,
|
||||||
dummy.as_ref(),
|
dummy.as_ref(),
|
||||||
);
|
);
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
writeln!(Uart0, "{}", c.0).unwrap();
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{}", c.0).unwrap();
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,14 +15,35 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 256;
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
const ELEMENTS_PER_BANK: usize = 16;
|
const ELEMENTS_PER_BANK: usize = 16;
|
||||||
const BANKS: usize = 16;
|
const BANKS: usize = 16;
|
||||||
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&vadd::KERNEL);
|
#[cfg(feature = "X1")]
|
||||||
|
pim::state::set_kernel(&vadd::KERNEL_X1);
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
pim::state::set_kernel(&vadd::KERNEL_X2);
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
pim::state::set_kernel(&vadd::KERNEL_X3);
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
pim::state::set_kernel(&vadd::KERNEL_X4);
|
||||||
|
|
||||||
let a = Box::new(pim::continuous_array::Vector(
|
let a = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
|
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32(i as _))),
|
||||||
@@ -31,7 +52,7 @@ pub extern "C" fn main() {
|
|||||||
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
|
SVector::<F16x1, ROWS>::from_fn(|i, _| F16x1(f16::from_f32((ROWS - i) as _))),
|
||||||
));
|
));
|
||||||
|
|
||||||
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
||||||
|
|
||||||
let mut c = Box::new(pim::continuous_array::Vector(
|
let mut c = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::zeros(),
|
SVector::<F16x1, ROWS>::zeros(),
|
||||||
@@ -43,10 +64,18 @@ pub extern "C" fn main() {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
// Execute kernel
|
// Execute kernel
|
||||||
|
{
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
|
||||||
|
|
||||||
writeln!(Uart0, "{}", c.0).unwrap();
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
vadd::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{}", c.0).unwrap();
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,14 +15,35 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
const ROWS: usize = 256;
|
const ROWS: usize = 256;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
const ROWS: usize = 512;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 1024;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 2048;
|
||||||
|
|
||||||
const ELEMENTS_PER_BANK: usize = 16;
|
const ELEMENTS_PER_BANK: usize = 16;
|
||||||
const BANKS: usize = 16;
|
const BANKS: usize = 16;
|
||||||
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
const BLOCKS: usize = ROWS / (ELEMENTS_PER_BANK * BANKS);
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&vmul::KERNEL);
|
#[cfg(feature = "X1")]
|
||||||
|
pim::state::set_kernel(&vmul::KERNEL_X1);
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
pim::state::set_kernel(&vmul::KERNEL_X2);
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
pim::state::set_kernel(&vmul::KERNEL_X3);
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
pim::state::set_kernel(&vmul::KERNEL_X4);
|
||||||
|
|
||||||
let a = Box::new(pim::continuous_array::Vector(
|
let a = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(2 as _))),
|
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(2 as _))),
|
||||||
@@ -31,7 +52,7 @@ pub extern "C" fn main() {
|
|||||||
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(3 as _))),
|
SVector::<F16x1, ROWS>::from_fn(|_, _| F16x1(f16::from_f32(3 as _))),
|
||||||
));
|
));
|
||||||
|
|
||||||
writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
// writeln!(Uart0, "{}+{}=", a.0, b.0).unwrap();
|
||||||
|
|
||||||
let mut c = Box::new(pim::continuous_array::Vector(
|
let mut c = Box::new(pim::continuous_array::Vector(
|
||||||
SVector::<F16x1, ROWS>::zeros(),
|
SVector::<F16x1, ROWS>::zeros(),
|
||||||
@@ -43,10 +64,18 @@ pub extern "C" fn main() {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
// Execute kernel
|
// Execute kernel
|
||||||
|
{
|
||||||
pim::state::set_bank_mode(BankMode::PimAllBank);
|
pim::state::set_bank_mode(BankMode::PimAllBank);
|
||||||
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
|
||||||
pim::state::set_bank_mode(BankMode::SingleBank);
|
|
||||||
|
|
||||||
writeln!(Uart0, "{}", c.0).unwrap();
|
pim_os::m5op::exit(0);
|
||||||
|
for _ in 0..10 {
|
||||||
|
vmul::execute::<ROWS, BLOCKS>(&a.0, &b.0, &mut c.0, dummy.as_ref());
|
||||||
|
}
|
||||||
|
pim_os::m5op::exit(0);
|
||||||
|
|
||||||
|
pim::state::set_bank_mode(BankMode::SingleBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeln!(Uart0, "{}", c.0).unwrap();
|
||||||
writeln!(Uart0, "Done").unwrap();
|
writeln!(Uart0, "Done").unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use aarch64_cpu::asm::barrier;
|
|||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL_X1: Kernel = Kernel([
|
||||||
Instruction::MOV {
|
Instruction::MOV {
|
||||||
src: File::Bank,
|
src: File::Bank,
|
||||||
dst: File::GrfA { index: 0 },
|
dst: File::GrfA { index: 0 },
|
||||||
@@ -76,6 +76,252 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X2: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 15,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X3: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 31,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X4: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 63,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 4 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 5 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 6 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 7 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
pub fn execute<const X16R: usize, const R: usize>(
|
pub fn execute<const X16R: usize, const R: usize>(
|
||||||
matrix: &Matrix<X16R, 8>,
|
matrix: &Matrix<X16R, 8>,
|
||||||
input_vector: &interleaved_array::Vector<8>,
|
input_vector: &interleaved_array::Vector<8>,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use crate::pim::{interleaved_array, operation::PimOperand, vector::F16x1};
|
|||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL_X1: Kernel = Kernel([
|
||||||
Instruction::MOV {
|
Instruction::MOV {
|
||||||
src: File::Bank,
|
src: File::Bank,
|
||||||
dst: File::SrfM { index: 0 },
|
dst: File::SrfM { index: 0 },
|
||||||
@@ -52,6 +52,288 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X2: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::SrfM { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 0 },
|
||||||
|
src2: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 1 },
|
||||||
|
src2: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X3: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::SrfM { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 0 },
|
||||||
|
src2: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 1 },
|
||||||
|
src2: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 2 },
|
||||||
|
src2: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 3 },
|
||||||
|
src2: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X4: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::SrfM { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 0 },
|
||||||
|
src2: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 1 },
|
||||||
|
src2: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 2 },
|
||||||
|
src2: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 3 },
|
||||||
|
src2: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 4 },
|
||||||
|
src2: File::GrfA { index: 4 },
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 5 },
|
||||||
|
src2: File::GrfA { index: 5 },
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 6 },
|
||||||
|
src2: File::GrfA { index: 6 },
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MAD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::SrfA { index: 7 },
|
||||||
|
src2: File::GrfA { index: 7 },
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 4 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 5 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 6 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfA { index: 7 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
pub fn execute<const R: usize, const BLOCKS: usize>(
|
pub fn execute<const R: usize, const BLOCKS: usize>(
|
||||||
a: &SVector<F16x1, R>,
|
a: &SVector<F16x1, R>,
|
||||||
b: &SVector<F16x1, R>,
|
b: &SVector<F16x1, R>,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use crate::pim::{operation::PimOperand, vector::F16x1};
|
|||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL_X1: Kernel = Kernel([
|
||||||
Instruction::MOV {
|
Instruction::MOV {
|
||||||
src: File::Bank,
|
src: File::Bank,
|
||||||
dst: File::GrfA { index: 0 },
|
dst: File::GrfA { index: 0 },
|
||||||
@@ -48,6 +48,265 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X2: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X3: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfB { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfB { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X4: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfB { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfB { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 4 },
|
||||||
|
dst: File::GrfB { index: 4 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 5 },
|
||||||
|
dst: File::GrfB { index: 5 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 6 },
|
||||||
|
dst: File::GrfB { index: 6 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::ADD {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 7 },
|
||||||
|
dst: File::GrfB { index: 7 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 4 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 5 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 6 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 7 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
pub fn execute<const R: usize, const BLOCKS: usize>(
|
pub fn execute<const R: usize, const BLOCKS: usize>(
|
||||||
a: &SVector<F16x1, R>,
|
a: &SVector<F16x1, R>,
|
||||||
b: &SVector<F16x1, R>,
|
b: &SVector<F16x1, R>,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use crate::pim::{operation::PimOperand, vector::F16x1};
|
|||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL_X1: Kernel = Kernel([
|
||||||
Instruction::MOV {
|
Instruction::MOV {
|
||||||
src: File::Bank,
|
src: File::Bank,
|
||||||
dst: File::GrfA { index: 0 },
|
dst: File::GrfA { index: 0 },
|
||||||
@@ -48,6 +48,265 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X2: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X3: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfB { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfB { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X4: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 1 },
|
||||||
|
dst: File::GrfB { index: 1 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 2 },
|
||||||
|
dst: File::GrfB { index: 2 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 3 },
|
||||||
|
dst: File::GrfB { index: 3 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 4 },
|
||||||
|
dst: File::GrfB { index: 4 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 5 },
|
||||||
|
dst: File::GrfB { index: 5 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 6 },
|
||||||
|
dst: File::GrfB { index: 6 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::MUL {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 7 },
|
||||||
|
dst: File::GrfB { index: 7 },
|
||||||
|
aam: false,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 4 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 5 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 6 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 7 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
pub fn execute<const R: usize, const BLOCKS: usize>(
|
pub fn execute<const R: usize, const BLOCKS: usize>(
|
||||||
a: &SVector<F16x1, R>,
|
a: &SVector<F16x1, R>,
|
||||||
b: &SVector<F16x1, R>,
|
b: &SVector<F16x1, R>,
|
||||||
|
|||||||
Reference in New Issue
Block a user