Add support for EVEN/ODD PIM configuration
This commit is contained in:
@@ -53,11 +53,11 @@ pub extern "C" fn entry() -> ! {
|
||||
barrier::dsb(barrier::SY);
|
||||
|
||||
pim_state.set_bank_mode(BankMode::PimAllBank);
|
||||
compute_array.0[1].execute_instruction_read();
|
||||
compute_array.0[2].execute_instruction_read();
|
||||
compute_array.0[0].execute_instruction_read();
|
||||
compute_array.0[2].execute_instruction_write();
|
||||
dummy_array.execute_instruction_read();
|
||||
compute_array.0[1].execute_instruction_read_dual_bank();
|
||||
compute_array.0[2].execute_instruction_read_dual_bank();
|
||||
compute_array.0[0].execute_instruction_read_dual_bank();
|
||||
compute_array.0[2].execute_instruction_write_dual_bank();
|
||||
dummy_array.execute_instruction_read_single_bank();
|
||||
pim_state.set_bank_mode(BankMode::SingleBank);
|
||||
|
||||
compute_array.invalidate();
|
||||
@@ -70,7 +70,7 @@ pub extern "C" fn entry() -> ! {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
writeln!(&mut uart, "ComputeArray:\n{:?}", compute_array).unwrap();
|
||||
// writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap();
|
||||
|
||||
m5ops::exit();
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@ use half::f16;
|
||||
const NUMBER_OF_BANKS: usize = 32;
|
||||
const ELEMENTS_PER_CACHE_LINE: usize = 16;
|
||||
const ELEMENTS_PER_BANK_ARRAY: usize = NUMBER_OF_BANKS * ELEMENTS_PER_CACHE_LINE;
|
||||
const EVEN_BANK_INDEX: usize = 0;
|
||||
const ODD_BANK_INDEX: usize = 8;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(C, align(1024))]
|
||||
@@ -17,88 +19,119 @@ impl Default for BankArray {
|
||||
}
|
||||
|
||||
impl BankArray {
|
||||
pub fn execute_instruction_read(&self) {
|
||||
if !cfg!(cacheless) {
|
||||
self.invalidate_single_bank(0);
|
||||
pub fn execute_instruction_read_single_bank(&self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.invalidate_bank(EVEN_BANK_INDEX);
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
self.read_data();
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
pub fn read_data(&self) {
|
||||
unsafe {
|
||||
// Read from first bank
|
||||
let first_bank = &self.0[0];
|
||||
core::ptr::read_volatile(first_bank);
|
||||
self.read_data_bank(EVEN_BANK_INDEX);
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
pub fn execute_instruction_read_dual_bank(&self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.invalidate_bank(EVEN_BANK_INDEX);
|
||||
self.invalidate_bank(ODD_BANK_INDEX);
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
// Read from first and second bank
|
||||
self.read_data_bank(EVEN_BANK_INDEX);
|
||||
self.read_data_bank(ODD_BANK_INDEX);
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
fn read_data_bank(&self, bank_index: usize) {
|
||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
||||
unsafe {
|
||||
core::ptr::read_volatile(bank);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute_instruction_write(&mut self) {
|
||||
if !cfg!(cacheless) {
|
||||
pub fn execute_instruction_write_single_bank(&mut self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.preload_zero();
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
self.write_data();
|
||||
// Write to first bank
|
||||
self.write_data_bank(EVEN_BANK_INDEX);
|
||||
|
||||
if !cfg!(cacheless) {
|
||||
self.invalidate_flush_single_bank(0);
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.invalidate_flush_bank(EVEN_BANK_INDEX);
|
||||
}
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
pub fn write_data(&mut self) {
|
||||
pub fn execute_instruction_write_dual_bank(&mut self) {
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.preload_zero();
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
// Write to first and second bank
|
||||
self.write_data_bank(EVEN_BANK_INDEX);
|
||||
self.write_data_bank(ODD_BANK_INDEX);
|
||||
|
||||
if !cfg!(feature = "cacheless") {
|
||||
self.invalidate_flush_bank(EVEN_BANK_INDEX);
|
||||
self.invalidate_flush_bank(ODD_BANK_INDEX);
|
||||
}
|
||||
|
||||
barrier::dsb(barrier::SY);
|
||||
}
|
||||
|
||||
fn write_data_bank(&mut self, bank_index: usize) {
|
||||
let bank = &mut self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
||||
unsafe {
|
||||
// Write to first bank
|
||||
let first_bank = &mut self.0[0];
|
||||
core::ptr::write_volatile(first_bank, f16::ZERO);
|
||||
core::ptr::write_volatile(bank, f16::ZERO);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn invalidate(&self) {
|
||||
(0..ELEMENTS_PER_BANK_ARRAY)
|
||||
.step_by(ELEMENTS_PER_CACHE_LINE)
|
||||
.for_each(|idx| self.invalidate_single_bank(idx));
|
||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx));
|
||||
}
|
||||
|
||||
pub fn invalidate_single_bank(&self, idx: usize) {
|
||||
fn invalidate_bank(&self, bank_index: usize) {
|
||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
||||
unsafe {
|
||||
// Invalidate first bank
|
||||
asm!("dc ivac, {val}", val = in(reg) &self.0[idx]);
|
||||
asm!("dc ivac, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn invalidate_flush(&self) {
|
||||
(0..ELEMENTS_PER_BANK_ARRAY)
|
||||
.step_by(ELEMENTS_PER_CACHE_LINE)
|
||||
.for_each(|idx| self.invalidate_flush_single_bank(idx));
|
||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx));
|
||||
}
|
||||
|
||||
pub fn invalidate_flush_single_bank(&self, idx: usize) {
|
||||
fn invalidate_flush_bank(&self, bank_index: usize) {
|
||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
||||
unsafe {
|
||||
// Invalidate and flush first bank
|
||||
asm!("dc civac, {val}", val = in(reg) &self.0[idx]);
|
||||
asm!("dc civac, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn preload_zero(&self) {
|
||||
(0..ELEMENTS_PER_BANK_ARRAY)
|
||||
.step_by(ELEMENTS_PER_CACHE_LINE)
|
||||
.for_each(|idx| self.preload_zero_single_bank(idx));
|
||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx));
|
||||
}
|
||||
|
||||
pub fn preload_zero_single_bank(&self, idx: usize) {
|
||||
fn preload_zero_bank(&self, bank_index: usize) {
|
||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
||||
unsafe {
|
||||
// Preload first bank
|
||||
asm!("dc zva, {val}", val = in(reg) &self.0[idx]);
|
||||
asm!("dc zva, {val}", val = in(reg) bank);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(C, align(65536))]
|
||||
pub struct ComputeArray<const N: usize>(pub [BankArray; N]);
|
||||
|
||||
impl<const N: usize> ComputeArray<N> {
|
||||
|
||||
@@ -9,15 +9,34 @@ pub const TEST_KERNEL: Kernel = Kernel([
|
||||
src: File::Bank,
|
||||
dst: File::GrfB { index: 0 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfA { index: 1 },
|
||||
},
|
||||
Instruction::MOV {
|
||||
src: File::Bank,
|
||||
dst: File::GrfB { index: 1 },
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfA { index: 0 },
|
||||
src2: File::GrfB { index: 0 },
|
||||
dst: File::GrfB { index: 0 },
|
||||
aam: true
|
||||
src2: File::GrfA { index: 1 },
|
||||
dst: File::GrfA { index: 1 },
|
||||
aam: false,
|
||||
},
|
||||
Instruction::MAC {
|
||||
src0: File::Bank,
|
||||
src1: File::GrfB { index: 0 },
|
||||
src2: File::GrfB { index: 1 },
|
||||
dst: File::GrfB { index: 1 },
|
||||
aam: false,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 0 },
|
||||
src: File::GrfA { index: 1 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::FILL {
|
||||
src: File::GrfB { index: 1 },
|
||||
dst: File::Bank,
|
||||
},
|
||||
Instruction::EXIT,
|
||||
@@ -44,8 +63,4 @@ pub const TEST_KERNEL: Kernel = Kernel([
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
Instruction::NOP,
|
||||
]);
|
||||
|
||||
@@ -19,7 +19,7 @@ impl PimState {
|
||||
|
||||
// TODO return token and return to singlebank when dropped
|
||||
pub fn set_bank_mode(&mut self, bank_mode: BankMode) {
|
||||
if cfg!(cacheless) {
|
||||
if cfg!(feature = "cacheless") {
|
||||
match bank_mode {
|
||||
BankMode::SingleBank => unsafe { boot::set_page_table_cache() },
|
||||
BankMode::AllBank => (),
|
||||
|
||||
@@ -6,6 +6,9 @@ edition = "2021"
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
[features]
|
||||
shared_pim_units = []
|
||||
|
||||
[dependencies]
|
||||
cxx = "1.0.110"
|
||||
env_logger = "0.10.1"
|
||||
|
||||
@@ -12,7 +12,7 @@ mod ffi {
|
||||
extern "Rust" {
|
||||
type PimVM;
|
||||
|
||||
fn new_pim_vm(num_pim_units: u32) -> Box<PimVM>;
|
||||
fn new_pim_vm(num_banks: u32) -> Box<PimVM>;
|
||||
fn reset(&mut self);
|
||||
fn apply_config(&mut self, config: &str);
|
||||
fn bank_mode(&self) -> BankMode;
|
||||
@@ -93,7 +93,13 @@ impl PimVM {
|
||||
}
|
||||
}
|
||||
|
||||
fn new_pim_vm(num_pim_units: u32) -> Box<PimVM> {
|
||||
fn new_pim_vm(num_banks: u32) -> Box<PimVM> {
|
||||
let num_pim_units = if cfg!(feature = "shared_pim_units") {
|
||||
num_banks / 2
|
||||
} else {
|
||||
num_banks
|
||||
};
|
||||
|
||||
Box::new(PimVM {
|
||||
pim_units: vec![PimUnit::default(); num_pim_units as _],
|
||||
pim_config: PimConfig {
|
||||
@@ -110,16 +116,26 @@ impl PimVM {
|
||||
pub fn execute_read(&mut self, bank_index: u32, address: u32, bank_data: &[u8]) {
|
||||
assert_eq!(bank_data.len(), BURST_LENGTH);
|
||||
|
||||
let pim_unit = &mut self.pim_units[bank_index as usize];
|
||||
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
|
||||
bank_index / 2
|
||||
} else {
|
||||
bank_index
|
||||
};
|
||||
|
||||
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
|
||||
|
||||
let mut inst = self.pim_config.kernel.0[pim_unit.pc as usize];
|
||||
|
||||
log::debug!(
|
||||
"PimUnit {pim_unit_index} Execute PC {}: {inst:?}",
|
||||
pim_unit.pc
|
||||
);
|
||||
|
||||
pim_unit.pc += 1;
|
||||
|
||||
let aam_grf_a_index = (address >> GRF_A_BIT_OFFSET) & 0b111;
|
||||
let aam_grf_b_index = (address >> GRF_B_BIT_OFFSET) & 0b111;
|
||||
|
||||
log::debug!("PimUnit {bank_index} Execute PC {}: {inst:?}", pim_unit.pc);
|
||||
|
||||
// The JUMP instruction is zero-cycle and not actually executed
|
||||
while let Instruction::JUMP { offset, count } = inst {
|
||||
pim_unit.jump_counter = match pim_unit.jump_counter {
|
||||
@@ -135,7 +151,7 @@ impl PimVM {
|
||||
}
|
||||
|
||||
pim_unit.pc = new_pc as _;
|
||||
log::debug!("PimUnit {bank_index} New PC {new_pc}: {inst:?}");
|
||||
log::debug!("PimUnit {pim_unit_index} New PC {new_pc}: {inst:?}");
|
||||
}
|
||||
|
||||
inst = self.pim_config.kernel.0[pim_unit.pc as usize];
|
||||
@@ -289,21 +305,27 @@ impl PimVM {
|
||||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
log::debug!("{data0:?}\n{data1:?}\n{data2:?}\n{product:?}\n{sum:?}");
|
||||
log::debug!("{data0:?}, {data1:?}, {data2:?}, {product:?}, {sum:?}");
|
||||
PimVM::store(dst, pim_unit, &sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute_write(&mut self, bank_index: u32) -> [u8; BURST_LENGTH] {
|
||||
let pim_unit = &mut self.pim_units[bank_index as usize];
|
||||
let pim_unit_index = if cfg!(feature = "shared_pim_units") {
|
||||
bank_index / 2
|
||||
} else {
|
||||
bank_index
|
||||
};
|
||||
|
||||
let pim_unit = &mut self.pim_units[pim_unit_index as usize];
|
||||
|
||||
let current_pc = pim_unit.pc;
|
||||
pim_unit.pc += 1;
|
||||
|
||||
let inst = &self.pim_config.kernel.0[current_pc as usize];
|
||||
|
||||
log::debug!("PimUnit {bank_index} Execute PC {current_pc}: {inst:?}");
|
||||
log::debug!("PimUnit {pim_unit_index} Execute PC {current_pc}: {inst:?}");
|
||||
|
||||
let data = match inst {
|
||||
Instruction::FILL { src, dst } => {
|
||||
@@ -317,6 +339,8 @@ impl PimVM {
|
||||
panic!("Unsupported dst operand: {dst:?}")
|
||||
}
|
||||
|
||||
log::debug!("Store {data:?}");
|
||||
|
||||
data
|
||||
}
|
||||
_ => panic!("Unsupported instruction for write: {inst:?}"),
|
||||
|
||||
Reference in New Issue
Block a user