use aarch64_cpu::asm::barrier; use core::arch::asm; use half::f16; const NUMBER_OF_BANKS: usize = 32; const ELEMENTS_PER_CACHE_LINE: usize = 16; const ELEMENTS_PER_BANK_ARRAY: usize = NUMBER_OF_BANKS * ELEMENTS_PER_CACHE_LINE; const EVEN_BANK_INDEX: usize = 0; const ODD_BANK_INDEX: usize = 8; #[derive(Clone, Debug)] #[repr(C, align(1024))] pub struct BankArray(pub [f16; ELEMENTS_PER_BANK_ARRAY]); impl Default for BankArray { fn default() -> Self { Self([f16::ZERO; ELEMENTS_PER_BANK_ARRAY]) } } impl BankArray { pub fn execute_instruction_read_single_bank(&self) { if !cfg!(feature = "cacheless") { self.invalidate_bank(EVEN_BANK_INDEX); barrier::dsb(barrier::SY); } // Read from first bank self.read_data_bank(EVEN_BANK_INDEX); barrier::dsb(barrier::SY); } pub fn execute_instruction_read_dual_bank(&self) { if !cfg!(feature = "cacheless") { self.invalidate_bank(EVEN_BANK_INDEX); self.invalidate_bank(ODD_BANK_INDEX); barrier::dsb(barrier::SY); } // Read from first and second bank self.read_data_bank(EVEN_BANK_INDEX); self.read_data_bank(ODD_BANK_INDEX); barrier::dsb(barrier::SY); } fn read_data_bank(&self, bank_index: usize) { let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE]; unsafe { core::ptr::read_volatile(bank); } } pub fn execute_instruction_write_single_bank(&mut self) { if !cfg!(feature = "cacheless") { self.preload_zero(); barrier::dsb(barrier::SY); } // Write to first bank self.write_data_bank(EVEN_BANK_INDEX); if !cfg!(feature = "cacheless") { self.invalidate_flush_bank(EVEN_BANK_INDEX); } barrier::dsb(barrier::SY); } pub fn execute_instruction_write_dual_bank(&mut self) { if !cfg!(feature = "cacheless") { self.preload_zero(); barrier::dsb(barrier::SY); } // Write to first and second bank self.write_data_bank(EVEN_BANK_INDEX); self.write_data_bank(ODD_BANK_INDEX); if !cfg!(feature = "cacheless") { self.invalidate_flush_bank(EVEN_BANK_INDEX); self.invalidate_flush_bank(ODD_BANK_INDEX); } barrier::dsb(barrier::SY); } fn write_data_bank(&mut self, bank_index: usize) { let bank = &mut self.0[bank_index * ELEMENTS_PER_CACHE_LINE]; unsafe { core::ptr::write_volatile(bank, f16::ZERO); } } pub fn invalidate(&self) { (0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx)); } fn invalidate_bank(&self, bank_index: usize) { let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE]; unsafe { asm!("dc ivac, {val}", val = in(reg) bank); } } pub fn invalidate_flush(&self) { (0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx)); } fn invalidate_flush_bank(&self, bank_index: usize) { let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE]; unsafe { asm!("dc civac, {val}", val = in(reg) bank); } } pub fn preload_zero(&self) { (0..NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx)); } fn preload_zero_bank(&self, bank_index: usize) { let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE]; unsafe { // Preload first bank asm!("dc zva, {val}", val = in(reg) bank); } } } #[derive(Clone, Debug)] #[repr(C, align(65536))] pub struct ComputeArray(pub [BankArray; N]); impl ComputeArray { pub fn invalidate_flush(&self) { self.0 .iter() .for_each(|bank_array| bank_array.invalidate_flush()); } pub fn invalidate(&self) { self.0.iter().for_each(|bank_array| bank_array.invalidate()); } } impl Default for ComputeArray { fn default() -> Self { Self(core::array::from_fn(|_| Default::default())) } }