diff --git a/references.bib b/references.bib index ffa8a34..e3a5403 100644 --- a/references.bib +++ b/references.bib @@ -303,7 +303,7 @@ }@article{forlin2022, title = {Sim 2 {{PIM}}: {{A}} Complete Simulation Framework for {{Processing-in-Memory}}}, shorttitle = {Sim 2 {{PIM}}}, - author = {Forlin, Bruno E. and Santos, Paulo C. and Becker, Augusto E. and Alves, Marco A.Z. and Carro, Luigi}, + author = {Forlin, Bruno E. and others}, year = {2022}, month = jul, journal = {Journal of Systems Architecture}, @@ -315,12 +315,11 @@ abstract = {With the help of modern memory integration technologies, Processing-in-Memory (PIM) has emerged as a practical approach to mitigate the memory wall while improving performance and energy efficiency in contemporary applications. Since these designs encompass accelerating and increasing the efficiency of critical specific and general-purposed applications, it is expected that these accelerators will be coupled to existing systems and consequently with systems capable of multi-thread computing. However, there is a lack of tools capable of quickly simulating different PIMs designs and their suitable integration with other hosts. This gap is even worse when considering simulations of multi-core systems. This work presents Sim2PIM, a Simple Simulator for PIM devices that seamlessly integrates any PIM architecture with the host processor and memory hierarchy. The framework simulation achieves execution speeds and accuracy on par with the perf tool on host code, less than 10\% run-time overhead, and around 2\% difference in metrics. Additionally, by exploring the thread parallelism in the application and utilizing the host hardware, Sim2PIM can achieve more than 8{\texttimes} simulation speedup compared to a sequential simulation and orders of magnitude compared to other simulators. Sim2PIM is available to download at https://pim.computer/.}, langid = {english}, keywords = {not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\YKGM4QLD\E. Forlin et al. - 2022 - Sim 2 PIM A complete simulation framework for Pro.pdf} } @misc{hyun2024, title = {Pathfinding {{Future PIM Architectures}} by {{Demystifying}} a {{Commercial PIM Technology}}}, - author = {Hyun, Bongjoon and Kim, Taehun and Lee, Dongjae and Rhu, Minsoo}, + author = {Hyun, Bongjoon and others}, year = {2024}, month = mar, number = {arXiv:2308.00846}, @@ -332,14 +331,13 @@ archiveprefix = {arxiv}, langid = {english}, keywords = {Computer Science - Hardware Architecture,not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\XS65A46E\Hyun et al. - 2024 - Pathfinding Future PIM Architectures by Demystifyi.pdf} } @inproceedings{mosanu2022, title = {{{PiMulator}}: A {{Fast}} and {{Flexible Processing-in-Memory Emulation Platform}}}, shorttitle = {{{PiMulator}}}, booktitle = {2022 {{Design}}, {{Automation}} \& {{Test}} in {{Europe Conference}} \& {{Exhibition}} ({{DATE}})}, - author = {Mosanu, Sergiu and Sakib, Mohammad Nazmus and Tracy, Tommy and Cukurtas, Ersin and Ahmed, Alif and Ivanov, Preslav and Khan, Samira and Skadron, Kevin and Stan, Mircea}, + author = {Mosanu, Sergiu and others}, year = {2022}, month = mar, pages = {1473--1478}, @@ -351,13 +349,12 @@ isbn = {978-3-9819263-6-1}, langid = {english}, keywords = {not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\X6Y9VFRI\Mosanu et al. - 2022 - PiMulator a Fast and Flexible Processing-in-Memor.pdf} } @article{xie2022, title = {{{MPU-Sim}}: {{A Simulator}} for {{In-DRAM Near-Bank Processing Architectures}}}, shorttitle = {{{MPU-Sim}}}, - author = {Xie, Xinfeng and Gu, Peng and Huang, Jiayi and Ding, Yufei and Xie, Yuan}, + author = {Xie, Xinfeng and others}, year = {2022}, month = jan, journal = {IEEE Computer Architecture Letters}, @@ -370,13 +367,12 @@ abstract = {Despite the promising future of near-bank computing to address the ''memory wall'', there are still critical hardware and software challenges, such as designing compute logics within a stringent area budget and developing software support for efficient data mapping. An open-source simulation framework plays an important role in addressing these challenges, which is unfortunately missing. In this paper, we introduce our open-source simulator for in-DRAM near-bank processing accelerators, MPU-Sim, to complete this missing piece in the research and development of future near-bank processing solutions. We detail the design, implementation, and interface of MPU-Sim, and conduct calibration studies for key hardware components with state-of-the-art simulators to validate our implementations. Finally, we use MPU-Sim for two case studies, DRAM refreshing and thread-block scheduling, to demonstrate the potential usage of MPU-Sim to study hardware and software optimizations for near-bank processing architectures.}, langid = {english}, keywords = {not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\L34LPUAD\Xie et al. - 2022 - MPU-Sim A Simulator for In-DRAM Near-Bank Process.pdf} } @article{xu2019, title = {{{PIMSim}}: {{A Flexible}} and {{Detailed Processing-in-Memory Simulator}}}, shorttitle = {{{PIMSim}}}, - author = {Xu, Sheng and Chen, Xiaoming and Wang, Ying and Han, Yinhe and Qian, Xuehai and Li, Xiaowei}, + author = {Xu, Sheng and others}, year = {2019}, month = jan, journal = {IEEE Computer Architecture Letters}, @@ -389,14 +385,13 @@ abstract = {With the advent of big data applications and new process technologies, Process-in-Memory (PIM) attracts much attention in memory research as the architecture studies gradually shift from processors to heterogeneous aspects. How to achieve reliable and efficient PIM architecture modeling becomes increasingly urgent for the researchers, who want to experiment on critical issues from detailed implementations of their proposed PIM designs. This paper proposes PIMSim, a full-system and highly-configurable PIM simulator to facilitate circuit-, architecture- and system-level researches. PIMSim enables architectural simulation of PIM and implements three simulation modes to provide a wide range of speed/accuracy tradeoffs. It offers detailed performance and energy models to simulate PIM-enabled instructions, compiler, in-memory processing logic, various memory devices, and PIM coherence. PIMSim is open source and available at https://github.com/vineodd/PIMSim.}, langid = {english}, keywords = {not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\DAZNYVK8\Xu et al. - 2019 - PIMSim A Flexible and Detailed Processing-in-Memo.pdf} } @inproceedings{zhou2021, title = {{{DP-Sim}}: {{A Full-stack Simulation Infrastructure}} for {{Digital Processing In-Memory Architectures}}}, shorttitle = {{{DP-Sim}}}, booktitle = {Proceedings of the 26th {{Asia}} and {{South Pacific Design Automation Conference}}}, - author = {Zhou, Minxuan and Imani, Mohsen and Kim, Yeseong and Gupta, Saransh and Rosing, Tajana}, + author = {Zhou, Minxuan and others}, year = {2021}, month = jan, pages = {639--644}, @@ -408,13 +403,12 @@ isbn = {978-1-4503-7999-1}, langid = {english}, keywords = {not read}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\8TYVMQ5I\Zhou et al. - 2021 - DP-Sim A Full-stack Simulation Infrastructure for.pdf} } @inproceedings{santos2021, title = {{{Sim2PIM}}: {{A Fast Method}} for {{Simulating Host Independent}} \& {{PIM Agnostic Designs}}}, shorttitle = {{{Sim2PIM}}}, booktitle = {2021 {{Design}}, {{Automation}} \& {{Test}} in {{Europe Conference}} \& {{Exhibition}} ({{DATE}})}, - author = {Santos, Paulo C. and Forlin, Bruno E. and Carro, Luigi}, + author = {Santos, Paulo C. and others}, year = {2021}, month = feb, pages = {226--231}, @@ -425,13 +419,12 @@ abstract = {Processing-in-Memory (PIM), with the help of modern memory integration technologies, has emerged as a practical approach to mitigate the memory wall and improve performance and energy efficiency in contemporary applications. However, there is a need for tools capable of quickly simulating different PIMs designs and their suitable integration with different hosts. This work presents Sim2PIM, a Simple Simulator for PIM devices that seamlessly integrates any PIM architecture with the host processor and memory hierarchy. Sim2PIM's simulation environment allows the user to describe a PIM architecture in different userdefined abstraction levels. The application code runs natively on the Host, with minimal overhead from the simulator integration, allowing Sim2PIM to collect precise metrics from the Hardware Performance Counters (HPCs). Our simulator is available to download at https://pim.computer/.}, isbn = {978-3-9819263-5-4}, langid = {english}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\88DV9TYW\Santos et al. - 2021 - Sim2PIM A Fast Method for Simulating Host Indepen.pdf} } @inproceedings{seshadri2013, title = {{{RowClone}}: Fast and Energy-Efficient in-{{DRAM}} Bulk Data Copy and Initialization}, shorttitle = {{{RowClone}}}, booktitle = {Proceedings of the 46th {{Annual IEEE}}/{{ACM International Symposium}} on {{Microarchitecture}}}, - author = {Seshadri, Vivek and Kim, Yoongu and Fallin, Chris and Lee, Donghyuk and Ausavarungnirun, Rachata and Pekhimenko, Gennady and Luo, Yixin and Mutlu, Onur and Gibbons, Phillip B. and Kozuch, Michael A. and Mowry, Todd C.}, + author = {Seshadri, Vivek and others}, year = {2013}, month = dec, pages = {185--197}, @@ -441,12 +434,11 @@ urldate = {2024-02-05}, isbn = {978-1-4503-2638-4}, langid = {english}, - file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\85WGY7ZW\Seshadri et al. - 2013 - RowClone fast and energy-efficient in-DRAM bulk d.pdf} } @misc{seshadri2020, title = {In-{{DRAM Bulk Bitwise Execution Engine}}}, - author = {Seshadri, Vivek and Mutlu, Onur}, + author = {Seshadri, Vivek and others}, year = {2020}, month = apr, number = {arXiv:1905.09822}, @@ -456,5 +448,4 @@ urldate = {2024-02-05}, abstract = {Many applications heavily use bitwise operations on large bitvectors as part of their computation. In existing systems, performing such bulk bitwise operations requires the processor to transfer a large amount of data on the memory channel, thereby consuming high latency, memory bandwidth, and energy. In this paper, we describe Ambit, a recently-proposed mechanism to perform bulk bitwise operations completely inside main memory. Ambit exploits the internal organization and analog operation of DRAM-based memory to achieve low cost, high performance, and low energy. Ambit exposes a new bulk bitwise execution model to the host processor. Evaluations show that Ambit significantly improves the performance of several applications that use bulk bitwise operations, including databases.}, archiveprefix = {arxiv}, - file = {C\:\\Users\\christ\\Nextcloud2\\Verschiedenes\\Zotero\\storage\\3J45PFD2\\Seshadri und Mutlu - 2020 - In-DRAM Bulk Bitwise Execution Engine.pdf;C\:\\Users\\christ\\Nextcloud2\\Verschiedenes\\Zotero\\storage\\DTK64DHZ\\1905.html} } diff --git a/samplepaper.tex b/samplepaper.tex index ee7a1dd..0e3a0df 100644 --- a/samplepaper.tex +++ b/samplepaper.tex @@ -341,7 +341,7 @@ Therefore, there is a break-even point between dimensions X1 and X2 where \ac{pi \label{fig:speedups} \end{figure} -Besides it's own virtual prototype, Samsung used a real hardware accelerator platform for its analyses, which is based on a Xilinx Zynq Ultrascale+ \ac{fpga} and uses real manufactured \ac{fimdram} memory packages. +Besides it's own virtual prototype, Samsung used a real hardware accelerator platform for its analyses, which is based on a Xilinx Zynq Ultrascale+ FPGA and uses real manufactured \ac{fimdram} memory packages. Similar to the previous simulations, Samsung has used different input dimensions for its microbenchmarks for both its \ac{gemv} and its vector ADD workloads, which are consistent. The performed ADD microbenchmark of Samsung shows an average speedup of around $\qty{1.6}{\times}$ for the real system and \qty{2.6}{\times} for the virtual prototype.