Add support for EVEN/ODD PIM configuration

This commit is contained in:
2023-12-16 22:34:11 +01:00
parent aecb19b4f4
commit 2e44890c53
6 changed files with 133 additions and 58 deletions

View File

@@ -53,11 +53,11 @@ pub extern "C" fn entry() -> ! {
barrier::dsb(barrier::SY);
pim_state.set_bank_mode(BankMode::PimAllBank);
compute_array.0[1].execute_instruction_read();
compute_array.0[2].execute_instruction_read();
compute_array.0[0].execute_instruction_read();
compute_array.0[2].execute_instruction_write();
dummy_array.execute_instruction_read();
compute_array.0[1].execute_instruction_read_dual_bank();
compute_array.0[2].execute_instruction_read_dual_bank();
compute_array.0[0].execute_instruction_read_dual_bank();
compute_array.0[2].execute_instruction_write_dual_bank();
dummy_array.execute_instruction_read_single_bank();
pim_state.set_bank_mode(BankMode::SingleBank);
compute_array.invalidate();
@@ -70,7 +70,7 @@ pub extern "C" fn entry() -> ! {
)
.unwrap();
writeln!(&mut uart, "ComputeArray:\n{:?}", compute_array).unwrap();
// writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap();
m5ops::exit();

View File

@@ -5,6 +5,8 @@ use half::f16;
const NUMBER_OF_BANKS: usize = 32;
const ELEMENTS_PER_CACHE_LINE: usize = 16;
const ELEMENTS_PER_BANK_ARRAY: usize = NUMBER_OF_BANKS * ELEMENTS_PER_CACHE_LINE;
const EVEN_BANK_INDEX: usize = 0;
const ODD_BANK_INDEX: usize = 8;
#[derive(Clone, Debug)]
#[repr(C, align(1024))]
@@ -17,88 +19,119 @@ impl Default for BankArray {
}
impl BankArray {
pub fn execute_instruction_read(&self) {
if !cfg!(cacheless) {
self.invalidate_single_bank(0);
pub fn execute_instruction_read_single_bank(&self) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX);
barrier::dsb(barrier::SY);
}
self.read_data();
barrier::dsb(barrier::SY);
}
pub fn read_data(&self) {
unsafe {
// Read from first bank
let first_bank = &self.0[0];
core::ptr::read_volatile(first_bank);
self.read_data_bank(EVEN_BANK_INDEX);
barrier::dsb(barrier::SY);
}
pub fn execute_instruction_read_dual_bank(&self) {
if !cfg!(feature = "cacheless") {
self.invalidate_bank(EVEN_BANK_INDEX);
self.invalidate_bank(ODD_BANK_INDEX);
barrier::dsb(barrier::SY);
}
// Read from first and second bank
self.read_data_bank(EVEN_BANK_INDEX);
self.read_data_bank(ODD_BANK_INDEX);
barrier::dsb(barrier::SY);
}
fn read_data_bank(&self, bank_index: usize) {
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
unsafe {
core::ptr::read_volatile(bank);
}
}
pub fn execute_instruction_write(&mut self) {
if !cfg!(cacheless) {
pub fn execute_instruction_write_single_bank(&mut self) {
if !cfg!(feature = "cacheless") {
self.preload_zero();
barrier::dsb(barrier::SY);
}
self.write_data();
// Write to first bank
self.write_data_bank(EVEN_BANK_INDEX);
if !cfg!(cacheless) {
self.invalidate_flush_single_bank(0);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX);
}
barrier::dsb(barrier::SY);
}
pub fn write_data(&mut self) {
pub fn execute_instruction_write_dual_bank(&mut self) {
if !cfg!(feature = "cacheless") {
self.preload_zero();
barrier::dsb(barrier::SY);
}
// Write to first and second bank
self.write_data_bank(EVEN_BANK_INDEX);
self.write_data_bank(ODD_BANK_INDEX);
if !cfg!(feature = "cacheless") {
self.invalidate_flush_bank(EVEN_BANK_INDEX);
self.invalidate_flush_bank(ODD_BANK_INDEX);
}
barrier::dsb(barrier::SY);
}
fn write_data_bank(&mut self, bank_index: usize) {
let bank = &mut self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
unsafe {
// Write to first bank
let first_bank = &mut self.0[0];
core::ptr::write_volatile(first_bank, f16::ZERO);
core::ptr::write_volatile(bank, f16::ZERO);
}
}
pub fn invalidate(&self) {
(0..ELEMENTS_PER_BANK_ARRAY)
.step_by(ELEMENTS_PER_CACHE_LINE)
.for_each(|idx| self.invalidate_single_bank(idx));
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx));
}
pub fn invalidate_single_bank(&self, idx: usize) {
fn invalidate_bank(&self, bank_index: usize) {
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
unsafe {
// Invalidate first bank
asm!("dc ivac, {val}", val = in(reg) &self.0[idx]);
asm!("dc ivac, {val}", val = in(reg) bank);
}
}
pub fn invalidate_flush(&self) {
(0..ELEMENTS_PER_BANK_ARRAY)
.step_by(ELEMENTS_PER_CACHE_LINE)
.for_each(|idx| self.invalidate_flush_single_bank(idx));
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx));
}
pub fn invalidate_flush_single_bank(&self, idx: usize) {
fn invalidate_flush_bank(&self, bank_index: usize) {
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
unsafe {
// Invalidate and flush first bank
asm!("dc civac, {val}", val = in(reg) &self.0[idx]);
asm!("dc civac, {val}", val = in(reg) bank);
}
}
pub fn preload_zero(&self) {
(0..ELEMENTS_PER_BANK_ARRAY)
.step_by(ELEMENTS_PER_CACHE_LINE)
.for_each(|idx| self.preload_zero_single_bank(idx));
(0..NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx));
}
pub fn preload_zero_single_bank(&self, idx: usize) {
fn preload_zero_bank(&self, bank_index: usize) {
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
unsafe {
// Preload first bank
asm!("dc zva, {val}", val = in(reg) &self.0[idx]);
asm!("dc zva, {val}", val = in(reg) bank);
}
}
}
#[derive(Clone, Debug)]
#[repr(C, align(65536))]
pub struct ComputeArray<const N: usize>(pub [BankArray; N]);
impl<const N: usize> ComputeArray<N> {

View File

@@ -9,15 +9,34 @@ pub const TEST_KERNEL: Kernel = Kernel([
src: File::Bank,
dst: File::GrfB { index: 0 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfA { index: 1 },
},
Instruction::MOV {
src: File::Bank,
dst: File::GrfB { index: 1 },
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfA { index: 0 },
src2: File::GrfB { index: 0 },
dst: File::GrfB { index: 0 },
aam: true
src2: File::GrfA { index: 1 },
dst: File::GrfA { index: 1 },
aam: false,
},
Instruction::MAC {
src0: File::Bank,
src1: File::GrfB { index: 0 },
src2: File::GrfB { index: 1 },
dst: File::GrfB { index: 1 },
aam: false,
},
Instruction::FILL {
src: File::GrfB { index: 0 },
src: File::GrfA { index: 1 },
dst: File::Bank,
},
Instruction::FILL {
src: File::GrfB { index: 1 },
dst: File::Bank,
},
Instruction::EXIT,
@@ -44,8 +63,4 @@ pub const TEST_KERNEL: Kernel = Kernel([
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
Instruction::NOP,
]);

View File

@@ -19,7 +19,7 @@ impl PimState {
// TODO return token and return to singlebank when dropped
pub fn set_bank_mode(&mut self, bank_mode: BankMode) {
if cfg!(cacheless) {
if cfg!(feature = "cacheless") {
match bank_mode {
BankMode::SingleBank => unsafe { boot::set_page_table_cache() },
BankMode::AllBank => (),

View File

@@ -6,6 +6,9 @@ edition = "2021"
[lib]
crate-type = ["staticlib"]
[features]
shared_pim_units = []
[dependencies]
cxx = "1.0.110"
env_logger = "0.10.1"

View File

@@ -12,7 +12,7 @@ mod ffi {
extern "Rust" {
type PimVM;
fn new_pim_vm(num_pim_units: u32) -> Box<PimVM>;
fn new_pim_vm(num_banks: u32) -> Box<PimVM>;
fn reset(&mut self);
fn apply_config(&mut self, config: &str);
fn bank_mode(&self) -> BankMode;
@@ -93,7 +93,13 @@ impl PimVM {
}
}
fn new_pim_vm(num_pim_units: u32) -> Box<PimVM> {
fn new_pim_vm(num_banks: u32) -> Box<PimVM> {
let num_pim_units = if cfg!(feature = "shared_pim_units") {
num_banks / 2
} else {
num_banks
};
Box::new(PimVM {
pim_units: vec![PimUnit::default(); num_pim_units as _],
pim_config: PimConfig {
@@ -110,16 +116,26 @@ impl PimVM {
pub fn execute_read(&mut self, bank_index: u32, address: u32, bank_data: &[u8]) {
assert_eq!(bank_data.len(), BURST_LENGTH);
let pim_unit = &mut self.pim_units[bank_index as usize];
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
bank_index / 2
} else {
bank_index
};
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
let mut inst = self.pim_config.kernel.0[pim_unit.pc as usize];
log::debug!(
"PimUnit {pim_unit_index} Execute PC {}: {inst:?}",
pim_unit.pc
);
pim_unit.pc += 1;
let aam_grf_a_index = (address >> GRF_A_BIT_OFFSET) & 0b111;
let aam_grf_b_index = (address >> GRF_B_BIT_OFFSET) & 0b111;
log::debug!("PimUnit {bank_index} Execute PC {}: {inst:?}", pim_unit.pc);
// The JUMP instruction is zero-cycle and not actually executed
while let Instruction::JUMP { offset, count } = inst {
pim_unit.jump_counter = match pim_unit.jump_counter {
@@ -135,7 +151,7 @@ impl PimVM {
}
pim_unit.pc = new_pc as _;
log::debug!("PimUnit {bank_index} New PC {new_pc}: {inst:?}");
log::debug!("PimUnit {pim_unit_index} New PC {new_pc}: {inst:?}");
}
inst = self.pim_config.kernel.0[pim_unit.pc as usize];
@@ -289,21 +305,27 @@ impl PimVM {
.try_into()
.unwrap();
log::debug!("{data0:?}\n{data1:?}\n{data2:?}\n{product:?}\n{sum:?}");
log::debug!("{data0:?}, {data1:?}, {data2:?}, {product:?}, {sum:?}");
PimVM::store(dst, pim_unit, &sum);
}
}
}
pub fn execute_write(&mut self, bank_index: u32) -> [u8; BURST_LENGTH] {
let pim_unit = &mut self.pim_units[bank_index as usize];
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
bank_index / 2
} else {
bank_index
};
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
let current_pc = pim_unit.pc;
pim_unit.pc += 1;
let inst = &self.pim_config.kernel.0[current_pc as usize];
log::debug!("PimUnit {bank_index} Execute PC {current_pc}: {inst:?}");
log::debug!("PimUnit {pim_unit_index} Execute PC {current_pc}: {inst:?}");
let data = match inst {
Instruction::FILL { src, dst } => {
@@ -317,6 +339,8 @@ impl PimVM {
panic!("Unsupported dst operand: {dst:?}")
}
log::debug!("Store {data:?}");
data
}
_ => panic!("Unsupported instruction for write: {inst:?}"),