pimsys-paper/references.bib

@misc{blas1979,
	title = {{{BLAS}} ({{Basic Linear Algebra Subprograms}})},
	author = {{Netlib}},
	year = {1979},
	urldate = {2024-01-08},
	howpublished = {https://www.netlib.org/blas/},
}

@inproceedings{he2020,
	title = {Newton: {{A DRAM-maker}}'s {{Accelerator-in-Memory}} ({{AiM}}) {{
	         Architecture}} for {{Machine Learning}}},
	shorttitle = {Newton},
	booktitle = {2020 53rd {{Annual IEEE}}/{{ACM International Symposium}} on {{
	             Microarchitecture}} ({{MICRO}})},
	author = {He, Mingxuan and others},
	year = {2020},
	month = oct,
	pages = {372--385},
	publisher = {IEEE},
	address = {Athens, Greece},
	doi = {10.1109/MICRO50266.2020.00040},
	urldate = {2024-01-09},
	isbn = {978-1-72817-383-2},
	keywords = {reviewed},
}

@inproceedings{kang2022,
	title = {An {{FPGA-based RNN-T Inference Accelerator}} with {{PIM-HBM}}},
	booktitle = {Proceedings of the 2022 {{ACM}}/{{SIGDA International Symposium
	             }} on {{Field-Programmable Gate Arrays}}},
	author = {Kang, Shinhaeng and others},
	year = {2022},
	month = feb,
	pages = {146--152},
	publisher = {ACM},
	address = {Virtual Event USA},
	doi = {10.1145/3490422.3502355},
	urldate = {2024-01-08},
	isbn = {978-1-4503-9149-8},
	langid = {english},
	keywords = {reviewed},
}

@inproceedings{kwon2021,
	title = {25.4 {{A}} 20nm {{6GB Function-In-Memory DRAM}}, {{Based}} on {{
	         HBM2}} with a 1.{{2TFLOPS Programmable Computing Unit Using
	         Bank-Level Parallelism}}, for {{Machine Learning Applications}}},
	booktitle = {2021 {{IEEE International Solid- State Circuits Conference}} ({
	             {ISSCC}})},
	author = {Kwon, Young-Cheon and others},
	year = {2021},
	month = feb,
	pages = {350--352},
	publisher = {IEEE},
	address = {San Francisco, CA, USA},
	doi = {10.1109/ISSCC42613.2021.9365862},
	urldate = {2024-01-08},
	isbn = {978-1-72819-549-0},
	langid = {english},
	keywords = {reviewed},
}

@inproceedings{lee2021,
	title = {Hardware {{Architecture}} and {{Software Stack}} for {{PIM Based}}
	         on {{Commercial DRAM Technology}} : {{Industrial Product}}},
	shorttitle = {Hardware {{Architecture}} and {{Software Stack}} for {{PIM
	              Based}} on {{Commercial DRAM Technology}}},
	booktitle = {2021 {{ACM}}/{{IEEE}} 48th {{Annual International Symposium}}
	             on {{Computer Architecture}} ({{ISCA}})},
	author = {Lee, Sukhan and others},
	year = {2021},
	month = jun,
	pages = {43--56},
	publisher = {IEEE},
	address = {Valencia, Spain},
	doi = {10.1109/ISCA52012.2021.00013},
	urldate = {2024-01-08},
	isbn = {978-1-66543-333-4},
	langid = {english},
	keywords = {reviewed},
}

@article{rosenfeld2011,
	title = {{{DRAMSim2}}: {{A Cycle Accurate Memory System Simulator}}},
	shorttitle = {{{DRAMSim2}}},
	author = {Rosenfeld, P and others},
	year = {2011},
	month = jan,
	journal = {IEEE Computer Architecture Letters},
	volume = {10},
	number = {1},
	pages = {16--19},
	issn = {1556-6056},
	doi = {10.1109/L-CA.2011.4},
	urldate = {2024-03-11},
	langid = {english},
}

@misc{shin-haengkang2023,
	title = {{{PIMSimulator}}},
	author = {{Shin-haeng Kang} and others},
	year = {2023},
	month = nov,
	urldate = {2024-02-08},
	abstract = {Processing-In-Memory (PIM) Simulator},
	howpublished = {https://github.com/SAITPublic/PIMSimulator},
}

@article{steiner2022a,
	title = {{{DRAMSys4}}.0: {{An Open-Source Simulation Framework}} for {{
	         In-depth DRAM Analyses}}},
	shorttitle = {{{DRAMSys4}}.0},
	author = {Steiner, Lukas and others},
	year = {2022},
	month = apr,
	journal = {International Journal of Parallel Programming},
	volume = {50},
	number = {2},
	pages = {217--242},
	issn = {0885-7458, 1573-7640},
	doi = {10.1007/s10766-022-00727-4},
	urldate = {2024-01-08},
	langid = {english},
}

@incollection{sudarshan2022,
	title = {A {{Critical Assessment}} of {{DRAM-PIM Architectures}} - {{Trends}
	         }, {{Challenges}} and {{Solutions}}},
	booktitle = {Embedded {{Computer Systems}}: {{Architectures}}, {{Modeling}},
	             and {{Simulation}}},
	author = {Sudarshan, Chirag and others},
	editor = {Orailoglu, Alex and Reichenbach, Marc and Jung, Matthias},
	year = {2022},
	volume = {13511},
	pages = {362--379},
	publisher = {Springer International Publishing},
	address = {Cham},
	doi = {10.1007/978-3-031-15074-6_23},
	urldate = {2024-01-21},
	isbn = {978-3-031-15073-9 978-3-031-15074-6},
	langid = {english},
}

@inproceedings{jouhyu_21,
	author = {Jouppi, Norman P. and others},
	booktitle = {2021 ACM/IEEE 48th Annual International Symposium on Computer
	             Architecture (ISCA)},
	title = {Ten Lessons From Three Generations Shaped Google’s TPUv4i :
	         Industrial Product},
	doi = {10.1109/ISCA52012.2021.00010},
	pages = {1-14},
	keywords = {Training;Program processors;Quantization (signal);Wires;Random
	            access memory;Throughput;Software},
	owner = {MJ},
	year = {2021},
}

@article{sto_70,
	author = {Stone, Harold S.},
	title = {A Logic-in-Memory Computer},
	doi = {10.1109/TC.1970.5008902},
	number = {1},
	pages = {73-78},
	volume = {C-19},
	journal = {IEEE Transactions on Computers},
	keywords = {Computers;Logic arrays;Microelectronics;Memory
	            management;Adders;Magnetic memory;Complexity theory;Cache
	            memories;computer architecture;logic-in-memory;microelectronic
	            memories;unconventional computer systems},
	owner = {MJ},
	year = {1970},
}

@article{gomhaj_21,
	author = {Juan G{\'{o}}mez{-}Luna and others},
	title = {Benchmarking a New Paradigm: An Experimental Analysis of a Real
	         Processing-in-Memory Architecture},
	eprint = {2105.03814},
	eprinttype = {arXiv},
	url = {https://arxiv.org/abs/2105.03814},
	volume = {abs/2105.03814},
	bibsource = {dblp computer science bibliography, https://dblp.org},
	biburl = {https://dblp.org/rec/journals/corr/abs-2105-03814.bib},
	journal = {CoRR},
	owner = {MJ},
	timestamp = {Fri, 14 May 2021 12:13:30 +0200},
	year = {2021},
}

@misc{lowahm_20,
	author = {Jason Lowe-Power and others},
	title = {{T}he gem5 {S}imulator: {V}ersion 20.0+},
	eprint = {2007.03152},
	archiveprefix = {arXiv},
	groups = {MJ:1},
	owner = {MJ},
	primaryclass = {cs.AR},
	timestamp = {2020-07-08},
	year = {2020},
}

@inproceedings{stejun_20,
	author = {Steiner, Lukas and others},
	booktitle = {International Conference on Embedded Computer Systems
	             Architectures Modeling and Simulation (SAMOS)},
	title = {{DRAMS}ys4.0: {A} {F}ast and {C}ycle-{A}ccurate {S}ystem{C}/{TLM}-{
	         B}ased {DRAM} {S}imulator},
	publisher = {Springer},
	groups = {MJ:1},
	month = {July},
	owner = {MJ},
	timestamp = {2020-07-14},
	year = {2020},
}

@misc{corda2021,
	title = {{{NMPO}}: {{Near-Memory Computing Profiling}} and {{Offloading}}},
	shorttitle = {{{NMPO}}},
	author = {Corda, Stefano and others},
	year = {2021},
	month = jun,
	number = {arXiv:2106.15284},
	eprint = {2106.15284},
	primaryclass = {cs},
	publisher = {arXiv},
	urldate = {2024-03-20},
	archiveprefix = {arxiv},
	langid = {english},
	keywords = {Computer Science - Hardware Architecture,Computer Science -
	            Performance},
}

@inproceedings{singh2019,
	title = {{{NAPEL}}: {{Near-Memory Computing Application Performance
	         Prediction}} via {{Ensemble Learning}}},
	shorttitle = {{{NAPEL}}},
	booktitle = {Proceedings of the 56th {{Annual Design Automation Conference}}
	             2019},
	author = {Singh, Gagandeep and others},
	year = {2019},
	month = jun,
	pages = {1--6},
	publisher = {ACM},
	address = {Las Vegas NV USA},
	doi = {10.1145/3316781.3317867},
	urldate = {2024-03-20},
	isbn = {978-1-4503-6725-7},
	langid = {english},
}

@article{yu2021,
	title = {{{MultiPIM}}: {{A Detailed}} and {{Configurable Multi-Stack
	         Processing-In-Memory Simulator}}},
	shorttitle = {{{MultiPIM}}},
	author = {Yu, Chao and others},
	year = {2021},
	month = jan,
	journal = {IEEE Computer Architecture Letters},
	volume = {20},
	number = {1},
	pages = {54--57},
	issn = {1556-6056, 1556-6064, 2473-2575},
	doi = {10.1109/LCA.2021.3061905},
	urldate = {2024-03-20},
	langid = {english},
}

@article{sanchez2013,
	title = {{{ZSim}}: Fast and Accurate Microarchitectural Simulation of
	         Thousand-Core Systems},
	shorttitle = {{{ZSim}}},
	author = {Sanchez, Daniel and others},
	year = {2013},
	month = jun,
	journal = {ACM SIGARCH Computer Architecture News},
	volume = {41},
	number = {3},
	pages = {475--486},
	issn = {0163-5964},
	doi = {10.1145/2508148.2485963},
	urldate = {2024-03-20},
	langid = {english},
}
@article{kim2016a,
	title = {Ramulator: {{A Fast}} and {{Extensible DRAM Simulator}}},
	shorttitle = {Ramulator},
	author = {Kim, Yoongu and others},
	year = {2016},
	month = jan,
	journal = {IEEE Computer Architecture Letters},
	volume = {15},
	number = {1},
	pages = {45--49},
	issn = {1556-6056},
	doi = {10.1109/LCA.2015.2414456},
	urldate = {2024-03-20},
	langid = {english},
}
@misc{rust,
  title = {The {{Rust Programming Language}}},
  author = {{Rust Foundation}},
  howpublished = {https://www.rust-lang.org/}
}@article{forlin2022,
  title = {Sim 2 {{PIM}}: {{A}} Complete Simulation Framework for {{Processing-in-Memory}}},
  shorttitle = {Sim 2 {{PIM}}},
  author = {Forlin, Bruno E. and Santos, Paulo C. and Becker, Augusto E. and Alves, Marco A.Z. and Carro, Luigi},
  year = {2022},
  month = jul,
  journal = {Journal of Systems Architecture},
  volume = {128},
  pages = {102528},
  issn = {13837621},
  doi = {10.1016/j.sysarc.2022.102528},
  urldate = {2024-03-22},
  abstract = {With the help of modern memory integration technologies, Processing-in-Memory (PIM) has emerged as a practical approach to mitigate the memory wall while improving performance and energy efficiency in contemporary applications. Since these designs encompass accelerating and increasing the efficiency of critical specific and general-purposed applications, it is expected that these accelerators will be coupled to existing systems and consequently with systems capable of multi-thread computing. However, there is a lack of tools capable of quickly simulating different PIMs designs and their suitable integration with other hosts. This gap is even worse when considering simulations of multi-core systems. This work presents Sim2PIM, a Simple Simulator for PIM devices that seamlessly integrates any PIM architecture with the host processor and memory hierarchy. The framework simulation achieves execution speeds and accuracy on par with the perf tool on host code, less than 10\% run-time overhead, and around 2\% difference in metrics. Additionally, by exploring the thread parallelism in the application and utilizing the host hardware, Sim2PIM can achieve more than 8{\texttimes} simulation speedup compared to a sequential simulation and orders of magnitude compared to other simulators. Sim2PIM is available to download at https://pim.computer/.},
  langid = {english},
  keywords = {not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\YKGM4QLD\E. Forlin et al. - 2022 - Sim 2 PIM A complete simulation framework for Pro.pdf}
}

@misc{hyun2024,
  title = {Pathfinding {{Future PIM Architectures}} by {{Demystifying}} a {{Commercial PIM Technology}}},
  author = {Hyun, Bongjoon and Kim, Taehun and Lee, Dongjae and Rhu, Minsoo},
  year = {2024},
  month = mar,
  number = {arXiv:2308.00846},
  eprint = {2308.00846},
  primaryclass = {cs},
  publisher = {arXiv},
  urldate = {2024-03-22},
  abstract = {Processing-in-memory (PIM) has been explored for decades by computer architects, yet it has never seen the light of day in real-world products due to its high design overheads and lack of a killer application. With the advent of critical memoryintensive workloads, several commercial PIM technologies have been introduced to the market, ranging from domain-specific PIM architectures to more general-purpose PIM architectures. In this work, we deepdive into UPMEM's commercial PIM technology, a general-purpose PIM-enabled parallel computing architecture that is highly programmable. Our first key contribution is the development of a flexible simulation framework for PIM. The simulator we developed (aka uPIMulator) enables the compilation of UPMEM-PIM source codes into its compiled machine-level instructions, which are subsequently consumed by our cyclelevel performance simulator. Using uPIMulator, we demystify UPMEM's PIM design through a detailed characterization study. Finally, we identify some key limitations of the current UPMEMPIM system through our case studies and present some important architectural features that will become critical for future PIM architectures to support.},
  archiveprefix = {arxiv},
  langid = {english},
  keywords = {Computer Science - Hardware Architecture,not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\XS65A46E\Hyun et al. - 2024 - Pathfinding Future PIM Architectures by Demystifyi.pdf}
}

@inproceedings{mosanu2022,
  title = {{{PiMulator}}: A {{Fast}} and {{Flexible Processing-in-Memory Emulation Platform}}},
  shorttitle = {{{PiMulator}}},
  booktitle = {2022 {{Design}}, {{Automation}} \& {{Test}} in {{Europe Conference}} \& {{Exhibition}} ({{DATE}})},
  author = {Mosanu, Sergiu and Sakib, Mohammad Nazmus and Tracy, Tommy and Cukurtas, Ersin and Ahmed, Alif and Ivanov, Preslav and Khan, Samira and Skadron, Kevin and Stan, Mircea},
  year = {2022},
  month = mar,
  pages = {1473--1478},
  publisher = {IEEE},
  address = {Antwerp, Belgium},
  doi = {10.23919/DATE54114.2022.9774614},
  urldate = {2024-03-22},
  abstract = {Motivated by the memory wall problem, researchers propose many new Processing-in-Memory (PiM) architectures to bring computation closer to data. However, evaluating the performance of these emerging architectures involves using a myriad of tools, including circuit simulators, behavioral RTL or software simulation models, hardware approximations, etc. It is challenging to mimic both software and hardware aspects of a PiM architecture using the currently available tools with high performance and fidelity. Until and unless actual products that include PiM become available, the next best thing is to emulate various hardware PiM solutions on FPGA fabric and boards. This paper presents a modular, parameterizable, FPGA synthesizable soft PiM model suitable for prototyping and rapid evaluation of Processing-in-Memory architectures.},
  isbn = {978-3-9819263-6-1},
  langid = {english},
  keywords = {not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\X6Y9VFRI\Mosanu et al. - 2022 - PiMulator a Fast and Flexible Processing-in-Memor.pdf}
}

@article{xie2022,
  title = {{{MPU-Sim}}: {{A Simulator}} for {{In-DRAM Near-Bank Processing Architectures}}},
  shorttitle = {{{MPU-Sim}}},
  author = {Xie, Xinfeng and Gu, Peng and Huang, Jiayi and Ding, Yufei and Xie, Yuan},
  year = {2022},
  month = jan,
  journal = {IEEE Computer Architecture Letters},
  volume = {21},
  number = {1},
  pages = {1--4},
  issn = {1556-6056, 1556-6064, 2473-2575},
  doi = {10.1109/LCA.2021.3135557},
  urldate = {2024-03-24},
  abstract = {Despite the promising future of near-bank computing to address the ''memory wall'', there are still critical hardware and software challenges, such as designing compute logics within a stringent area budget and developing software support for efficient data mapping. An open-source simulation framework plays an important role in addressing these challenges, which is unfortunately missing. In this paper, we introduce our open-source simulator for in-DRAM near-bank processing accelerators, MPU-Sim, to complete this missing piece in the research and development of future near-bank processing solutions. We detail the design, implementation, and interface of MPU-Sim, and conduct calibration studies for key hardware components with state-of-the-art simulators to validate our implementations. Finally, we use MPU-Sim for two case studies, DRAM refreshing and thread-block scheduling, to demonstrate the potential usage of MPU-Sim to study hardware and software optimizations for near-bank processing architectures.},
  langid = {english},
  keywords = {not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\L34LPUAD\Xie et al. - 2022 - MPU-Sim A Simulator for In-DRAM Near-Bank Process.pdf}
}

@article{xu2019,
  title = {{{PIMSim}}: {{A Flexible}} and {{Detailed Processing-in-Memory Simulator}}},
  shorttitle = {{{PIMSim}}},
  author = {Xu, Sheng and Chen, Xiaoming and Wang, Ying and Han, Yinhe and Qian, Xuehai and Li, Xiaowei},
  year = {2019},
  month = jan,
  journal = {IEEE Computer Architecture Letters},
  volume = {18},
  number = {1},
  pages = {6--9},
  issn = {1556-6056, 1556-6064, 2473-2575},
  doi = {10.1109/LCA.2018.2885752},
  urldate = {2024-03-22},
  abstract = {With the advent of big data applications and new process technologies, Process-in-Memory (PIM) attracts much attention in memory research as the architecture studies gradually shift from processors to heterogeneous aspects. How to achieve reliable and efficient PIM architecture modeling becomes increasingly urgent for the researchers, who want to experiment on critical issues from detailed implementations of their proposed PIM designs. This paper proposes PIMSim, a full-system and highly-configurable PIM simulator to facilitate circuit-, architecture- and system-level researches. PIMSim enables architectural simulation of PIM and implements three simulation modes to provide a wide range of speed/accuracy tradeoffs. It offers detailed performance and energy models to simulate PIM-enabled instructions, compiler, in-memory processing logic, various memory devices, and PIM coherence. PIMSim is open source and available at https://github.com/vineodd/PIMSim.},
  langid = {english},
  keywords = {not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\DAZNYVK8\Xu et al. - 2019 - PIMSim A Flexible and Detailed Processing-in-Memo.pdf}
}

@inproceedings{zhou2021,
  title = {{{DP-Sim}}: {{A Full-stack Simulation Infrastructure}} for {{Digital Processing In-Memory Architectures}}},
  shorttitle = {{{DP-Sim}}},
  booktitle = {Proceedings of the 26th {{Asia}} and {{South Pacific Design Automation Conference}}},
  author = {Zhou, Minxuan and Imani, Mohsen and Kim, Yeseong and Gupta, Saransh and Rosing, Tajana},
  year = {2021},
  month = jan,
  pages = {639--644},
  publisher = {ACM},
  address = {Tokyo Japan},
  doi = {10.1145/3394885.3431525},
  urldate = {2024-03-24},
  abstract = {Digital processing in-memory (DPIM) is a promising technology that significantly reduces data movements while providing high parallelism. In this work, we design and implement the first fullstack DPIM simulation infrastructure, DP-Sim, which evaluates a comprehensive range of DPIM-specific design space concerning both software and hardware. DP-Sim provides a C++ library to enable DPIM acceleration in general programs while supporting several aspects of software-level exploration by a convenient interface. The DP-Sim software front-end generates specialized instructions that can be processed by a hardware simulator based on a new DPIM-enabled architecture model which is 10.3\% faster than conventional memory simulation models. We use DP-Sim to explore the DPIM-specific design space of acceleration for various emerging applications. Our experiments show that bank-level control is 11.3{\texttimes} faster than conventional channel-level control because of higher computing parallelism. Furthermore, cost-aware memory allocation can provide at least 2.2{\texttimes} speedup vs. heuristic methods, showing the importance of data layout in DPIM acceleration.},
  isbn = {978-1-4503-7999-1},
  langid = {english},
  keywords = {not read},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\8TYVMQ5I\Zhou et al. - 2021 - DP-Sim A Full-stack Simulation Infrastructure for.pdf}
}
@inproceedings{santos2021,
  title = {{{Sim2PIM}}: {{A Fast Method}} for {{Simulating Host Independent}} \& {{PIM Agnostic Designs}}},
  shorttitle = {{{Sim2PIM}}},
  booktitle = {2021 {{Design}}, {{Automation}} \& {{Test}} in {{Europe Conference}} \& {{Exhibition}} ({{DATE}})},
  author = {Santos, Paulo C. and Forlin, Bruno E. and Carro, Luigi},
  year = {2021},
  month = feb,
  pages = {226--231},
  publisher = {IEEE},
  address = {Grenoble, France},
  doi = {10.23919/DATE51398.2021.9474104},
  urldate = {2024-03-25},
  abstract = {Processing-in-Memory (PIM), with the help of modern memory integration technologies, has emerged as a practical approach to mitigate the memory wall and improve performance and energy efficiency in contemporary applications. However, there is a need for tools capable of quickly simulating different PIMs designs and their suitable integration with different hosts. This work presents Sim2PIM, a Simple Simulator for PIM devices that seamlessly integrates any PIM architecture with the host processor and memory hierarchy. Sim2PIM's simulation environment allows the user to describe a PIM architecture in different userdefined abstraction levels. The application code runs natively on the Host, with minimal overhead from the simulator integration, allowing Sim2PIM to collect precise metrics from the Hardware Performance Counters (HPCs). Our simulator is available to download at https://pim.computer/.},
  isbn = {978-3-9819263-5-4},
  langid = {english},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\88DV9TYW\Santos et al. - 2021 - Sim2PIM A Fast Method for Simulating Host Indepen.pdf}
}
@inproceedings{seshadri2013,
  title = {{{RowClone}}: Fast and Energy-Efficient in-{{DRAM}} Bulk Data Copy and Initialization},
  shorttitle = {{{RowClone}}},
  booktitle = {Proceedings of the 46th {{Annual IEEE}}/{{ACM International Symposium}} on {{Microarchitecture}}},
  author = {Seshadri, Vivek and Kim, Yoongu and Fallin, Chris and Lee, Donghyuk and Ausavarungnirun, Rachata and Pekhimenko, Gennady and Luo, Yixin and Mutlu, Onur and Gibbons, Phillip B. and Kozuch, Michael A. and Mowry, Todd C.},
  year = {2013},
  month = dec,
  pages = {185--197},
  publisher = {ACM},
  address = {Davis California},
  doi = {10.1145/2540708.2540725},
  urldate = {2024-02-05},
  isbn = {978-1-4503-2638-4},
  langid = {english},
  file = {C:\Users\christ\Nextcloud2\Verschiedenes\Zotero\storage\85WGY7ZW\Seshadri et al. - 2013 - RowClone fast and energy-efficient in-DRAM bulk d.pdf}
}

@misc{seshadri2020,
  title = {In-{{DRAM Bulk Bitwise Execution Engine}}},
  author = {Seshadri, Vivek and Mutlu, Onur},
  year = {2020},
  month = apr,
  number = {arXiv:1905.09822},
  eprint = {1905.09822},
  primaryclass = {cs},
  publisher = {arXiv},
  urldate = {2024-02-05},
  abstract = {Many applications heavily use bitwise operations on large bitvectors as part of their computation. In existing systems, performing such bulk bitwise operations requires the processor to transfer a large amount of data on the memory channel, thereby consuming high latency, memory bandwidth, and energy. In this paper, we describe Ambit, a recently-proposed mechanism to perform bulk bitwise operations completely inside main memory. Ambit exploits the internal organization and analog operation of DRAM-based memory to achieve low cost, high performance, and low energy. Ambit exposes a new bulk bitwise execution model to the host processor. Evaluations show that Ambit significantly improves the performance of several applications that use bulk bitwise operations, including databases.},
  archiveprefix = {arxiv},
  file = {C\:\\Users\\christ\\Nextcloud2\\Verschiedenes\\Zotero\\storage\\3J45PFD2\\Seshadri und Mutlu - 2020 - In-DRAM Bulk Bitwise Execution Engine.pdf;C\:\\Users\\christ\\Nextcloud2\\Verschiedenes\\Zotero\\storage\\DTK64DHZ\\1905.html}
}