diff --git a/public/samsung.svg b/public/samsung.svg index be1cb18..3112a4d 100644 --- a/public/samsung.svg +++ b/public/samsung.svg @@ -4,9 +4,9 @@ + inkscape:label="10" + transform="translate(0,90.664115)"> + id="g2" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g4" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g6" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g8" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g10" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g12" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g14" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g16" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g18" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g20" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g22" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g24" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g26" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g28" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g30" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g32" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g34" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g36" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g38" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g40" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g42" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g44" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g46" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g48" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g50" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g52" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g54" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g56" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g58" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g60" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g62" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g64" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g66" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g68" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g70" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g72" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g74" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g76" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g78" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g80" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g82" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g84" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g86" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g88" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g90" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g92" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g94" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g96" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g98" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g100" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g102" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g104" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g106" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g108" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g110" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g112" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g114" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g116" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g118" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g120" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g122" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g124" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g126" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g128" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g130" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g132" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g134" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g136" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g138" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g140" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g142" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g144" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g146" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g148" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g150" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g152" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g154" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g156" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g158" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g160" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g162" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g164" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g166" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g168" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g170" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g172" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g174" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g176" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g178" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g180" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g182" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g184" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g186" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g188" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g190" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g192" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g194" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g196" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g198" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g200" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g202" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g204" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + clip-path="url(#clipPath207)" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + clip-path="url(#clipPath210)" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + clip-path="url(#clipPath213)" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g215" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g217" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g219" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g221" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g223" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g225" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g227" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g229" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g231" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> + id="g233" + transform="matrix(1.7010113,0,0,1.7010113,0,-90.664115)"> \ No newline at end of file +
+ +--- +src: ./slides/appendix.md +--- \ No newline at end of file diff --git a/slides/appendix.md b/slides/appendix.md new file mode 100644 index 0000000..e888c17 --- /dev/null +++ b/slides/appendix.md @@ -0,0 +1,42 @@ +## Appendix +### GEMV Kernel +
+ + + +```rust {all}{lines:true} +pub fn execute( + matrix: &Matrix, + input_vector: &Vector, + output_partial_sum_vector: &mut SVector, + dummy: &impl PimOperand, +) { + // Load input vector into GRF-A registers + for chunk in input_vector.0.iter() { + chunk.execute_read(); + } + + // Execute the MAC instructions without memory barriers + for sub_matrix in matrix.0.iter() { + for column_block in sub_matrix.fixed_rows::<1>(0).iter() { + column_block.execute_read_async(); + } + } + + // Verify all memory accesses have finished + barrier::dsb(barrier::SY); + + // Copy the partial sums into the bank + for chunk in output_partial_sum_vector + .fixed_rows_with_step_mut::(0, 16) + .iter_mut() + { + chunk.execute_write(); + } + + // Execute the EXIT instruction + dummy.execute_read(); +} +``` + + \ No newline at end of file diff --git a/slides/conclusion.md b/slides/conclusion.md new file mode 100644 index 0000000..000fc43 --- /dev/null +++ b/slides/conclusion.md @@ -0,0 +1,9 @@ +## Conclusion and Future Work +
+ +- achievable speedup of 17.6 × and 9.0 × hypothetical infinite compute system + - lower bound +- linux driver implementation +- comparison with real neural network workloads +- consider replacing library approach with compiler approach +- power comparison, power models needed diff --git a/slides/implementation.md b/slides/implementation.md index 6247872..3e8e94c 100644 --- a/slides/implementation.md +++ b/slides/implementation.md @@ -11,7 +11,7 @@ figureCaption: The PIM-HBM model integrated into DRAMSys --- layout: figure-side figureUrl: /data_structures.svg -figureCaption: The PIM-HBM model integrated into DRAMSys +figureCaption: Data structures for instructions and register files --- ## Virtual Prototype diff --git a/slides/pim.md b/slides/pim.md index a4e60c3..bf4a2a0 100644 --- a/slides/pim.md +++ b/slides/pim.md @@ -141,3 +141,5 @@ figureCaption: Mapping of the weight matrix onto the memory banks simulation models needed research should not only focus on hardware but also explore the software side! + +deswegen baue ich einen virutal protoype \ No newline at end of file diff --git a/slides/simulations.md b/slides/simulations.md index 71bcaf1..4491d3a 100644 --- a/slides/simulations.md +++ b/slides/simulations.md @@ -30,9 +30,71 @@ +--- + +## Simulations +### System Configuration +
+ +- Two system configurations: + - ARM 3GHz + - ARM Infinite + +- TODO ... GPU und so + --- layout: figure -figureUrl: /dnn.svg -figureCaption: A fully connected DNN layer +figureUrl: /speedup_normal.svg +figureCaption: Speedups of PIM compared to non-PIM +--- + +## Simulations +### Speedups / ARM System +
+ +--- +layout: figure +figureUrl: /speedup_inf.svg +figureCaption: Speedups of PIM compared to non-PIM +--- + +## Simulations +### Speedups / Infinite Compute System +
+ +--- +layout: figure +figureUrl: /samsung.svg +figureCaption: Speedups of Samsung for VADD and GEMV figureFootnoteNumber: 1 --- + +## Simulations +### Speedups / Samsung +
+ + + + Lee et al. „Hardware Architecture and Software Stack for PIM Based on Commercial DRAM Technology : Industrial Product“, 2021. + + + +--- +layout: figure +figureUrl: /runtimes_vector.svg +figureCaption: Runtimes for Vector Benchmarks +--- + +## Simulations +### Runtimes / Vector Benchmarks +
+ +--- +layout: figure +figureUrl: /runtimes_matrix.svg +figureCaption: Runtimes for Matrix Benchmarks +--- + +## Simulations +### Runtimes / Matrix Benchmarks +