Crude implementation of matrices using nalgebra
This commit is contained in:
@@ -46,16 +46,6 @@ pub enum Instruction {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Instruction {
|
|
||||||
pub fn supported_source(&self, src: File) -> bool {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn supported_destination(&self, src: File) -> bool {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub enum File {
|
pub enum File {
|
||||||
GrfA { index: u8 },
|
GrfA { index: u8 },
|
||||||
|
|||||||
@@ -12,10 +12,11 @@ cacheless = []
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
aarch64-cpu = "9.4.0"
|
aarch64-cpu = "9.4.0"
|
||||||
half = { version = "2.3.1", default-features = false }
|
half = { version = "2.3.1", default-features = false }
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive"] }
|
nalgebra = { version = "0.32.3", default-features = false }
|
||||||
serde-json-core = "0.5.1"
|
|
||||||
|
|
||||||
pim-isa = { path = "../pim-isa", default-features = false }
|
pim-isa = { path = "../pim-isa", default-features = false }
|
||||||
|
serde-json-core = "0.5.1"
|
||||||
|
serde = { version = "1.0", default-features = false, features = ["derive"] }
|
||||||
|
num-traits = { version = "0.2.17", default-features = false }
|
||||||
|
|
||||||
[profile.dev]
|
[profile.dev]
|
||||||
panic = "abort"
|
panic = "abort"
|
||||||
|
|||||||
@@ -1,20 +1,27 @@
|
|||||||
|
#![feature(generic_const_exprs)]
|
||||||
#![no_std]
|
#![no_std]
|
||||||
#![no_main]
|
#![no_main]
|
||||||
|
|
||||||
use aarch64_cpu::asm::barrier;
|
use aarch64_cpu::asm::barrier;
|
||||||
use core::{
|
use core::{
|
||||||
|
cell::RefCell,
|
||||||
fmt::Write,
|
fmt::Write,
|
||||||
panic::PanicInfo,
|
panic::PanicInfo,
|
||||||
sync::atomic::{compiler_fence, Ordering},
|
sync::atomic::{compiler_fence, Ordering},
|
||||||
};
|
};
|
||||||
use half::f16;
|
use half::f16;
|
||||||
|
use nalgebra::{Const, Matrix, Matrix2, SMatrixView};
|
||||||
use pim::{
|
use pim::{
|
||||||
array::{BankArray, ComputeArray},
|
array::{PimMatrixArena, PimStorage},
|
||||||
|
// array::PimMatrix,
|
||||||
|
// array::{BankArray, ComputeArray},
|
||||||
kernel::TEST_KERNEL,
|
kernel::TEST_KERNEL,
|
||||||
|
matrix::{F16x1, F16x16},
|
||||||
state::PimState,
|
state::PimState,
|
||||||
};
|
};
|
||||||
use pim_isa::BankMode;
|
use pim_isa::BankMode;
|
||||||
use uart::Uart0;
|
use uart::Uart0;
|
||||||
|
|
||||||
mod boot;
|
mod boot;
|
||||||
mod m5ops;
|
mod m5ops;
|
||||||
mod pim;
|
mod pim;
|
||||||
@@ -22,53 +29,74 @@ mod uart;
|
|||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn entry() -> ! {
|
pub extern "C" fn entry() -> ! {
|
||||||
let mut pim_state = PimState::new(&TEST_KERNEL);
|
|
||||||
let mut compute_array: ComputeArray<3> = ComputeArray([
|
|
||||||
BankArray([f16::from_f32(0.1); 512]),
|
|
||||||
BankArray([f16::from_f32(0.2); 512]),
|
|
||||||
BankArray([f16::from_f32(0.3); 512]),
|
|
||||||
]);
|
|
||||||
let dummy_array = BankArray::default();
|
|
||||||
let mut uart = Uart0;
|
let mut uart = Uart0;
|
||||||
|
let mut pim_state = PimState::new(&TEST_KERNEL);
|
||||||
|
|
||||||
writeln!(
|
let mut arena = RefCell::new(PimMatrixArena([[F16x16::default(); 8]; 8]));
|
||||||
&mut uart,
|
let pim_storage0 = PimStorage {
|
||||||
"PIM array is at {:x?}",
|
arena: &arena,
|
||||||
core::ptr::addr_of!(compute_array)
|
index: 0,
|
||||||
)
|
};
|
||||||
.unwrap();
|
let pim_storage1 = PimStorage {
|
||||||
|
arena: &arena,
|
||||||
|
index: 1,
|
||||||
|
};
|
||||||
|
let pim_storage2 = PimStorage {
|
||||||
|
arena: &arena,
|
||||||
|
index: 2,
|
||||||
|
};
|
||||||
|
let mut matrix0 = Matrix::from_data(pim_storage0);
|
||||||
|
let mut matrix1 = Matrix::from_data(pim_storage1);
|
||||||
|
matrix0.fill_lower_triangle(F16x1(f16::ONE), 0);
|
||||||
|
matrix1.fill_upper_triangle(F16x1(f16::from_f32(2.0)), 0);
|
||||||
|
writeln!(&mut uart, "{}", matrix0).unwrap();
|
||||||
|
writeln!(&mut uart, "{}", matrix1).unwrap();
|
||||||
|
|
||||||
writeln!(
|
// let mut compute_array: ComputeArray<3> = ComputeArray([
|
||||||
&mut uart,
|
// BankArray([F16x16([f16::from_f32(0.1); 16]); 32]),
|
||||||
"BankArray0: [{:?}, ...]\nBankArray1: [{:?}, ...]\nBankArray2: [{:?}, ...]",
|
// BankArray([f16::from_f32(0.2); 512]),
|
||||||
compute_array.0[0].0[0], compute_array.0[1].0[0], compute_array.0[2].0[0]
|
// BankArray([f16::from_f32(0.3); 512]),
|
||||||
)
|
// ]);
|
||||||
.unwrap();
|
// let dummy_array = BankArray::default();
|
||||||
|
|
||||||
writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap();
|
// writeln!(
|
||||||
|
// &mut uart,
|
||||||
|
// "PIM array is at {:x?}",
|
||||||
|
// core::ptr::addr_of!(compute_array)
|
||||||
|
// )
|
||||||
|
// .unwrap();
|
||||||
|
|
||||||
// Invalidate and flush array just in case
|
// writeln!(
|
||||||
compute_array.invalidate_flush();
|
// &mut uart,
|
||||||
dummy_array.invalidate_flush();
|
// "BankArray0: [{:?}, ...]\nBankArray1: [{:?}, ...]\nBankArray2: [{:?}, ...]",
|
||||||
barrier::dsb(barrier::SY);
|
// compute_array.0[0].0[0], compute_array.0[1].0[0], compute_array.0[2].0[0]
|
||||||
|
// )
|
||||||
|
// .unwrap();
|
||||||
|
|
||||||
pim_state.set_bank_mode(BankMode::PimAllBank);
|
// writeln!(&mut uart, "MAC: BankArray2 += BankArray0 * BankArray1",).unwrap();
|
||||||
compute_array.0[1].execute_instruction_read_dual_bank();
|
|
||||||
compute_array.0[2].execute_instruction_read_dual_bank();
|
|
||||||
compute_array.0[0].execute_instruction_read_dual_bank();
|
|
||||||
compute_array.0[2].execute_instruction_write_dual_bank();
|
|
||||||
dummy_array.execute_instruction_read_single_bank();
|
|
||||||
pim_state.set_bank_mode(BankMode::SingleBank);
|
|
||||||
|
|
||||||
compute_array.invalidate();
|
// // Invalidate and flush array just in case
|
||||||
barrier::dsb(barrier::SY);
|
// compute_array.invalidate_flush();
|
||||||
|
// dummy_array.invalidate_flush();
|
||||||
|
// barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
writeln!(
|
// pim_state.set_bank_mode(BankMode::PimAllBank);
|
||||||
&mut uart,
|
// compute_array.0[1].execute_instruction_read_dual_bank();
|
||||||
"BankArray2: [{:?}, ...]",
|
// compute_array.0[2].execute_instruction_read_dual_bank();
|
||||||
compute_array.0[2].0[0]
|
// compute_array.0[0].execute_instruction_read_dual_bank();
|
||||||
)
|
// compute_array.0[2].execute_instruction_write_dual_bank();
|
||||||
.unwrap();
|
// dummy_array.execute_instruction_read_single_bank();
|
||||||
|
// pim_state.set_bank_mode(BankMode::SingleBank);
|
||||||
|
|
||||||
|
// compute_array.invalidate();
|
||||||
|
// barrier::dsb(barrier::SY);
|
||||||
|
|
||||||
|
// writeln!(
|
||||||
|
// &mut uart,
|
||||||
|
// "BankArray2: [{:?}, ...]",
|
||||||
|
// compute_array.0[2].0[0]
|
||||||
|
// )
|
||||||
|
// .unwrap();
|
||||||
|
|
||||||
// writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap();
|
// writeln!(&mut uart, "ComputeArray:\n{:#?}", compute_array.0[2]).unwrap();
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
pub mod array;
|
pub mod array;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod kernel;
|
pub mod kernel;
|
||||||
|
pub mod matrix;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
|
|||||||
@@ -1,25 +1,91 @@
|
|||||||
|
use super::matrix::{F16x1, F16x16};
|
||||||
use aarch64_cpu::asm::barrier;
|
use aarch64_cpu::asm::barrier;
|
||||||
use core::arch::asm;
|
use core::panic;
|
||||||
|
use core::{arch::asm, cell::RefCell};
|
||||||
use half::f16;
|
use half::f16;
|
||||||
|
use nalgebra::{Const, Dyn, RawStorage, RawStorageMut, SMatrix, Storage};
|
||||||
|
|
||||||
const NUMBER_OF_BANKS: usize = 32;
|
// const NUMBER_OF_BANKS: usize = 32;
|
||||||
const ELEMENTS_PER_CACHE_LINE: usize = 16;
|
|
||||||
const ELEMENTS_PER_BANK_ARRAY: usize = NUMBER_OF_BANKS * ELEMENTS_PER_CACHE_LINE;
|
|
||||||
const EVEN_BANK_INDEX: usize = 0;
|
const EVEN_BANK_INDEX: usize = 0;
|
||||||
const ODD_BANK_INDEX: usize = 8;
|
const ODD_BANK_INDEX: usize = 8;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
#[repr(C, align(1024))]
|
#[repr(C, align(1024))]
|
||||||
pub struct BankArray(pub [f16; ELEMENTS_PER_BANK_ARRAY]);
|
pub struct PimMatrixArena<const R: usize, const C: usize>(pub [[F16x16; R]; C]);
|
||||||
|
|
||||||
impl Default for BankArray {
|
#[derive(Debug)]
|
||||||
fn default() -> Self {
|
pub struct PimStorage<'a, const R: usize, const C: usize> {
|
||||||
Self([f16::ZERO; ELEMENTS_PER_BANK_ARRAY])
|
pub arena: &'a RefCell<PimMatrixArena<R, C>>,
|
||||||
|
pub index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl<'a, const R: usize, const C: usize> RawStorage<F16x1, Const<R>, Const<C>>
|
||||||
|
for PimStorage<'a, R, C>
|
||||||
|
{
|
||||||
|
type RStride = Dyn;
|
||||||
|
type CStride = Dyn;
|
||||||
|
|
||||||
|
fn ptr(&self) -> *const F16x1 {
|
||||||
|
unsafe {
|
||||||
|
(&self.arena.borrow().0[0][0] as *const F16x16 as *const F16x1).offset(self.index as _)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shape(&self) -> (Const<R>, Const<C>) {
|
||||||
|
(Const::<R>, Const::<C>)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strides(&self) -> (Self::RStride, Self::CStride) {
|
||||||
|
(Dyn(16), Dyn(16 * R))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_contiguous(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn as_slice_unchecked(&self) -> &[F16x1] {
|
||||||
|
panic!("PimStorage is not contiguous!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BankArray {
|
unsafe impl<'a, const R: usize, const C: usize> RawStorageMut<F16x1, Const<R>, Const<C>>
|
||||||
pub fn execute_instruction_read_single_bank(&self) {
|
for PimStorage<'a, R, C>
|
||||||
|
{
|
||||||
|
fn ptr_mut(&mut self) -> *mut F16x1 {
|
||||||
|
unsafe {
|
||||||
|
(&mut self.arena.borrow_mut().0[0][0] as *mut F16x16 as *mut F16x1)
|
||||||
|
.offset(self.index as _)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn as_mut_slice_unchecked(&mut self) -> &mut [F16x1] {
|
||||||
|
panic!("PimStorage is not contiguous!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// #[repr(C, align(1024))]
|
||||||
|
// #[derive(Clone, Debug, Default)]
|
||||||
|
// pub struct PimMatrix(pub SMatrix<f166, 8, 8>);
|
||||||
|
|
||||||
|
// impl PimRegion for PimMatrix {
|
||||||
|
// const NUMBER_OF_BANKS: usize = 64;
|
||||||
|
|
||||||
|
// fn bank_ptr(&self, bank_index: usize) -> *const f16 {
|
||||||
|
// return &self.0[bank_index].0 as _;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16 {
|
||||||
|
// return &mut self.0[bank_index].0 as _;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub trait PimRegion {
|
||||||
|
const NUMBER_OF_BANKS: usize;
|
||||||
|
|
||||||
|
fn bank_ptr(&self, bank_index: usize) -> *const f16;
|
||||||
|
fn bank_ptr_mut(&mut self, bank_index: usize) -> *mut f16;
|
||||||
|
|
||||||
|
fn execute_instruction_read_single_bank(&self) {
|
||||||
if !cfg!(feature = "cacheless") {
|
if !cfg!(feature = "cacheless") {
|
||||||
self.invalidate_bank(EVEN_BANK_INDEX);
|
self.invalidate_bank(EVEN_BANK_INDEX);
|
||||||
|
|
||||||
@@ -32,7 +98,7 @@ impl BankArray {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute_instruction_read_dual_bank(&self) {
|
fn execute_instruction_read_dual_bank(&self) {
|
||||||
if !cfg!(feature = "cacheless") {
|
if !cfg!(feature = "cacheless") {
|
||||||
self.invalidate_bank(EVEN_BANK_INDEX);
|
self.invalidate_bank(EVEN_BANK_INDEX);
|
||||||
self.invalidate_bank(ODD_BANK_INDEX);
|
self.invalidate_bank(ODD_BANK_INDEX);
|
||||||
@@ -48,13 +114,13 @@ impl BankArray {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn read_data_bank(&self, bank_index: usize) {
|
fn read_data_bank(&self, bank_index: usize) {
|
||||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
let bank = self.bank_ptr(bank_index);
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::read_volatile(bank);
|
core::ptr::read_volatile(bank);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute_instruction_write_single_bank(&mut self) {
|
fn execute_instruction_write_single_bank(&mut self) {
|
||||||
if !cfg!(feature = "cacheless") {
|
if !cfg!(feature = "cacheless") {
|
||||||
self.preload_zero();
|
self.preload_zero();
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
@@ -70,7 +136,7 @@ impl BankArray {
|
|||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute_instruction_write_dual_bank(&mut self) {
|
fn execute_instruction_write_dual_bank(&mut self) {
|
||||||
if !cfg!(feature = "cacheless") {
|
if !cfg!(feature = "cacheless") {
|
||||||
self.preload_zero();
|
self.preload_zero();
|
||||||
barrier::dsb(barrier::SY);
|
barrier::dsb(barrier::SY);
|
||||||
@@ -89,40 +155,40 @@ impl BankArray {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn write_data_bank(&mut self, bank_index: usize) {
|
fn write_data_bank(&mut self, bank_index: usize) {
|
||||||
let bank = &mut self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
let bank = self.bank_ptr_mut(bank_index);
|
||||||
unsafe {
|
unsafe {
|
||||||
core::ptr::write_volatile(bank, f16::ZERO);
|
core::ptr::write_volatile(bank, Default::default());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn invalidate(&self) {
|
fn invalidate(&self) {
|
||||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx));
|
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_bank(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn invalidate_bank(&self, bank_index: usize) {
|
fn invalidate_bank(&self, bank_index: usize) {
|
||||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
let bank = self.bank_ptr(bank_index);
|
||||||
unsafe {
|
unsafe {
|
||||||
asm!("dc ivac, {val}", val = in(reg) bank);
|
asm!("dc ivac, {val}", val = in(reg) bank);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn invalidate_flush(&self) {
|
fn invalidate_flush(&self) {
|
||||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx));
|
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.invalidate_flush_bank(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn invalidate_flush_bank(&self, bank_index: usize) {
|
fn invalidate_flush_bank(&self, bank_index: usize) {
|
||||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
let bank = self.bank_ptr(bank_index);
|
||||||
unsafe {
|
unsafe {
|
||||||
asm!("dc civac, {val}", val = in(reg) bank);
|
asm!("dc civac, {val}", val = in(reg) bank);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn preload_zero(&self) {
|
fn preload_zero(&self) {
|
||||||
(0..NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx));
|
(0..Self::NUMBER_OF_BANKS).for_each(|idx| self.preload_zero_bank(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn preload_zero_bank(&self, bank_index: usize) {
|
fn preload_zero_bank(&self, bank_index: usize) {
|
||||||
let bank = &self.0[bank_index * ELEMENTS_PER_CACHE_LINE];
|
let bank = self.bank_ptr(bank_index);
|
||||||
unsafe {
|
unsafe {
|
||||||
// Preload first bank
|
// Preload first bank
|
||||||
asm!("dc zva, {val}", val = in(reg) bank);
|
asm!("dc zva, {val}", val = in(reg) bank);
|
||||||
@@ -132,9 +198,9 @@ impl BankArray {
|
|||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
#[repr(C, align(65536))]
|
#[repr(C, align(65536))]
|
||||||
pub struct ComputeArray<const N: usize>(pub [BankArray; N]);
|
pub struct ComputeArray<T: PimRegion, const N: usize>(pub [T; N]);
|
||||||
|
|
||||||
impl<const N: usize> ComputeArray<N> {
|
impl<T: PimRegion, const N: usize> ComputeArray<T, N> {
|
||||||
pub fn invalidate_flush(&self) {
|
pub fn invalidate_flush(&self) {
|
||||||
self.0
|
self.0
|
||||||
.iter()
|
.iter()
|
||||||
@@ -146,7 +212,7 @@ impl<const N: usize> ComputeArray<N> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const N: usize> Default for ComputeArray<N> {
|
impl<T: PimRegion + Default, const N: usize> Default for ComputeArray<T, N> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self(core::array::from_fn(|_| Default::default()))
|
Self(core::array::from_fn(|_| Default::default()))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,13 +20,13 @@ pub const TEST_KERNEL: Kernel = Kernel([
|
|||||||
Instruction::MAC {
|
Instruction::MAC {
|
||||||
src0: File::Bank,
|
src0: File::Bank,
|
||||||
src1: File::GrfA { index: 0 },
|
src1: File::GrfA { index: 0 },
|
||||||
src2: File::GrfA { index: 1 },
|
src2: File::GrfB { index: 0 },
|
||||||
dst: File::GrfA { index: 1 },
|
dst: File::GrfB { index: 0 },
|
||||||
aam: false,
|
aam: false,
|
||||||
},
|
},
|
||||||
Instruction::MAC {
|
Instruction::MAC {
|
||||||
src0: File::Bank,
|
src0: File::Bank,
|
||||||
src1: File::GrfB { index: 0 },
|
src1: File::GrfA { index: 1 },
|
||||||
src2: File::GrfB { index: 1 },
|
src2: File::GrfB { index: 1 },
|
||||||
dst: File::GrfB { index: 1 },
|
dst: File::GrfB { index: 1 },
|
||||||
aam: false,
|
aam: false,
|
||||||
|
|||||||
117
pim-os/src/pim/matrix.rs
Normal file
117
pim-os/src/pim/matrix.rs
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
use half::f16;
|
||||||
|
|
||||||
|
const FLOATING_POINT_UNITS: usize = 16;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Default, Clone, Copy, PartialEq)]
|
||||||
|
pub struct F16x1(pub f16);
|
||||||
|
|
||||||
|
impl core::fmt::Debug for F16x1 {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
Ok(self.0.fmt(f)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Display for F16x1 {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
Ok(self.0.fmt(f)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl num_traits::identities::Zero for F16x1 {
|
||||||
|
fn zero() -> Self {
|
||||||
|
Self(f16::ZERO)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_zero(&self) -> bool {
|
||||||
|
self.0 == f16::ZERO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl num_traits::identities::One for F16x1 {
|
||||||
|
fn one() -> Self {
|
||||||
|
Self(f16::ONE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Add<F16x1> for F16x1 {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn add(self, rhs: F16x1) -> Self::Output {
|
||||||
|
Self(self.0 + rhs.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::AddAssign<F16x1> for F16x1 {
|
||||||
|
fn add_assign(&mut self, rhs: F16x1) {
|
||||||
|
self.0 += rhs.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Mul<F16x1> for F16x1 {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn mul(self, rhs: F16x1) -> Self::Output {
|
||||||
|
Self(self.0 * rhs.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::MulAssign<F16x1> for F16x1 {
|
||||||
|
fn mul_assign(&mut self, rhs: F16x1) {
|
||||||
|
self.0 *= rhs.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Default, Clone, Copy, Debug, PartialEq)]
|
||||||
|
pub struct F16x16(pub [F16x1; FLOATING_POINT_UNITS]);
|
||||||
|
|
||||||
|
impl num_traits::identities::Zero for F16x16 {
|
||||||
|
fn zero() -> Self {
|
||||||
|
Self([F16x1::zero(); FLOATING_POINT_UNITS])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_zero(&self) -> bool {
|
||||||
|
self.0 == [F16x1::zero(); FLOATING_POINT_UNITS]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl num_traits::identities::One for F16x16 {
|
||||||
|
fn one() -> Self {
|
||||||
|
Self([F16x1::one(); FLOATING_POINT_UNITS])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Add<F16x16> for F16x16 {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn add(self, rhs: F16x16) -> Self::Output {
|
||||||
|
Self(core::array::from_fn(|i| self.0[i] + rhs.0[i]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::AddAssign<F16x16> for F16x16 {
|
||||||
|
fn add_assign(&mut self, rhs: F16x16) {
|
||||||
|
self.0
|
||||||
|
.iter_mut()
|
||||||
|
.zip(&rhs.0)
|
||||||
|
.for_each(|(left, right)| *left += *right);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::Mul<F16x16> for F16x16 {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn mul(self, rhs: F16x16) -> Self::Output {
|
||||||
|
Self(core::array::from_fn(|i| self.0[i] * rhs.0[i]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::ops::MulAssign<F16x16> for F16x16 {
|
||||||
|
fn mul_assign(&mut self, rhs: F16x16) {
|
||||||
|
self.0
|
||||||
|
.iter_mut()
|
||||||
|
.zip(&rhs.0)
|
||||||
|
.for_each(|(left, right)| *left *= *right);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user