Introduce X1, X2, X3, X4 complexity levels
This commit is contained in:
@@ -8,6 +8,10 @@ forced-target = "aarch64-unknown-none"
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
cacheless = []
|
cacheless = []
|
||||||
|
X1 = []
|
||||||
|
X2 = []
|
||||||
|
X3 = []
|
||||||
|
X4 = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
aarch64-cpu = "9.4.0"
|
aarch64-cpu = "9.4.0"
|
||||||
|
|||||||
@@ -18,14 +18,35 @@ use pim_os::{
|
|||||||
uart::Uart0,
|
uart::Uart0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "X1")]
|
||||||
|
const ROWS: usize = 16;
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
const ROWS: usize = 32;
|
const ROWS: usize = 32;
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
const ROWS: usize = 64;
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
const ROWS: usize = 128;
|
||||||
|
|
||||||
const COLUMNS: usize = 128;
|
const COLUMNS: usize = 128;
|
||||||
const X16_ROWS: usize = ROWS / 16;
|
const X16_ROWS: usize = ROWS / 16;
|
||||||
const X16_COLUMNS: usize = COLUMNS / 16;
|
const X16_COLUMNS: usize = COLUMNS / 16;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn main() {
|
pub extern "C" fn main() {
|
||||||
pim::state::set_kernel(&gemv::KERNEL);
|
#[cfg(feature = "X1")]
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X1);
|
||||||
|
|
||||||
|
#[cfg(feature = "X2")]
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X2);
|
||||||
|
|
||||||
|
#[cfg(feature = "X3")]
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X3);
|
||||||
|
|
||||||
|
#[cfg(feature = "X4")]
|
||||||
|
pim::state::set_kernel(&gemv::KERNEL_X4);
|
||||||
|
|
||||||
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
let mut matrix = SMatrix::<_, ROWS, COLUMNS>::zeros();
|
||||||
matrix.fill_lower_triangle(F16x1::one(), 0);
|
matrix.fill_lower_triangle(F16x1::one(), 0);
|
||||||
|
|||||||
@@ -5,7 +5,78 @@ use aarch64_cpu::asm::barrier;
|
|||||||
use nalgebra::SVector;
|
use nalgebra::SVector;
|
||||||
use pim_isa::{File, Instruction, Kernel};
|
use pim_isa::{File, Instruction, Kernel};
|
||||||
|
|
||||||
pub const KERNEL: Kernel = Kernel([
|
pub const KERNEL_X1: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 15,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X2: Kernel = Kernel([
|
||||||
Instruction::MOV {
|
Instruction::MOV {
|
||||||
src: File::Bank,
|
src: File::Bank,
|
||||||
dst: File::GrfA { index: 0 },
|
dst: File::GrfA { index: 0 },
|
||||||
@@ -79,8 +150,180 @@ pub const KERNEL: Kernel = Kernel([
|
|||||||
Instruction::NOP,
|
Instruction::NOP,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
pub fn execute<const R: usize, const X16C: usize>(
|
pub const KERNEL_X3: Kernel = Kernel([
|
||||||
matrix: &Matrix<2, X16C>,
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 31,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub const KERNEL_X4: Kernel = Kernel([
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 0 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 1 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 2 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 3 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 4 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 5 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 6 },
|
||||||
|
},
|
||||||
|
Instruction::MOV {
|
||||||
|
src: File::Bank,
|
||||||
|
dst: File::GrfA { index: 7 },
|
||||||
|
},
|
||||||
|
Instruction::MAC {
|
||||||
|
src0: File::Bank,
|
||||||
|
src1: File::GrfA { index: 0 },
|
||||||
|
src2: File::GrfB { index: 0 },
|
||||||
|
dst: File::GrfB { index: 0 },
|
||||||
|
aam: true,
|
||||||
|
},
|
||||||
|
Instruction::JUMP {
|
||||||
|
offset: -1,
|
||||||
|
count: 63,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 0 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 1 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 2 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 3 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 4 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 5 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 6 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::FILL {
|
||||||
|
src: File::GrfB { index: 7 },
|
||||||
|
dst: File::Bank,
|
||||||
|
},
|
||||||
|
Instruction::EXIT,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
Instruction::NOP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
pub fn execute<const X16R: usize, const R: usize, const X16C: usize>(
|
||||||
|
matrix: &Matrix<X16R, X16C>,
|
||||||
input_vector: &interleaved_array::Vector<X16C>,
|
input_vector: &interleaved_array::Vector<X16C>,
|
||||||
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
output_partial_sum_vector: &mut SVector<F16x16, R>,
|
||||||
dummy: &impl PimOperand,
|
dummy: &impl PimOperand,
|
||||||
@@ -98,7 +341,7 @@ pub fn execute<const R: usize, const X16C: usize>(
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
for chunk in output_partial_sum_vector
|
for chunk in output_partial_sum_vector
|
||||||
.fixed_rows_with_step_mut::<2>(0, 16)
|
.fixed_rows_with_step_mut::<X16R>(0, 16)
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
{
|
{
|
||||||
chunk.execute_write();
|
chunk.execute_write();
|
||||||
|
|||||||
Reference in New Issue
Block a user