@Article{Bruening2003, author = {Bruening, D. and Garnett, T. and Amarasinghe, S.}, journal = {International Symposium on Code Generation and Optimization, 2003. CGO 2003.}, title = {An infrastructure for adaptive dynamic optimization}, year = {2003}, doi = {10.1109/CGO.2003.1191551}, } @Article{Bruening2004, author = {Bruening, D.}, journal = {Massachusetts Institute of Technology}, title = {Efficient, transparent, and comprehensive runtime code manipulation}, year = {2004}, } @InProceedings{Abel19a, author = {Abel, Andreas and Reineke, Jan}, booktitle = {ASPLOS}, title = {uops.info: Characterizing Latency, Throughput, and Port Usage of Instructions on Intel Microarchitectures}, year = {2019}, address = {New York, NY, USA}, pages = {673--686}, publisher = {ACM}, series = {ASPLOS '19}, acmid = {3304062}, doi = {10.1145/3297858.3304062}, isbn = {978-1-4503-6240-5}, location = {Providence, RI, USA}, numpages = {14}, url = {http://doi.acm.org/10.1145/3297858.3304062}, } @Book{Jacob2008, author = {B. Jacob and S. W. Ng and D. T. Wang}, publisher = {Morgan Kaufmann}, title = {Memory Systems: Cache, DRAM, Disk}, year = {2008}, } @Article{Jahre2007, author = {Jahre, M. and Natvig, L.}, title = {Performance Effects of a Cache Miss Handling Architecture in a Multi-core Processor}, year = {2007}, } @InProceedings{Antonino2018, author = {Antonino, Pablo Oliveira and Jung, Matthias and Morgenstern, Andreas and Fa{\ss}nacht, Florian and Bauer, Thomas and Bachorek, Adam and Kuhn, Thomas and Nakagawa, Elisa Yumi}, booktitle = {Software Architecture}, title = {Enabling Continuous Software Engineering for Embedded Systems Architectures with Virtual Prototypes}, year = {2018}, address = {Cham}, editor = {Cuesta, Carlos E. and Garlan, David and P{\'e}rez, Jennifer}, pages = {115--130}, publisher = {Springer International Publishing}, abstract = {Continuous software engineering aims at orchestrating engineering knowledge from various disciplines in order to deal with the rapid changes within the ecosystems of which software-based systems are part of. The literature claims that one means to ensure these prompt responses is to incorporate virtual prototypes of the system as early as possible in the development process, such that requirements and architecture decisions are verified early and continuously by means of simulations. Despite the maturity of practices for designing and assessing architectures, as well as for virtual prototyping, it is still not clear how to jointly consider the practices from these disciplines within development processes, in order to address the dynamics imposed by continuous software engineering. In this regard, we discuss in this paper how to orchestrate architecture drivers and design specification techniques with virtual prototypes, to address the demands of continuous software engineering in development processes. Our proposals are based on experiences from research and industry projects in various domains such as automotive, agriculture, construction, and medical devices.}, isbn = {978-3-030-00761-4}, } @Article{IEEE2012, journal = {IEEE Std 1666-2011 (Revision of IEEE Std 1666-2005)}, title = {IEEE Standard for Standard SystemC Language Reference Manual}, year = {2012}, doi = {10.1109/IEEESTD.2012.6134619}, } @InProceedings{Menard2017, author = {Menard, Christian and Castrillon, Jeronimo and Jung, Matthias and Wehn, Norbert}, booktitle = {2017 International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, title = {System simulation with gem5 and SystemC: The keystone for full interoperability}, year = {2017}, pages = {62-69}, doi = {10.1109/SAMOS.2017.8344612}, } @InProceedings{Steiner2020, author = {Steiner, Lukas and Jung, Matthias and Prado, Felipe S. and Bykov, Kirill and Wehn, Norbert}, booktitle = {Embedded Computer Systems: Architectures, Modeling, and Simulation}, title = {DRAMSys4.0: A Fast and Cycle-Accurate SystemC/TLM-Based DRAM Simulator}, year = {2020}, address = {Cham}, editor = {Orailoglu, Alex and Jung, Matthias and Reichenbach, Marc}, pages = {110--126}, publisher = {Springer International Publishing}, abstract = {The simulation of DRAMs (Dynamic Random Access Memories) on system level requires highly accurate models due to their complex timing and power behavior. However, conventional cycle-accurate DRAM models often become the bottleneck for the overall simulation speed. A promising alternative are DRAM simulation models based on Transaction Level Modeling, which can be fast and accurate at the same time. In this paper we present DRAMSys4.0, which is, to the best of our knowledge, the fastest cycle-accurate open-source DRAM simulator and has a large range of functionalities. DRAMSys4.0 includes a novel simulator architecture that enables a fast adaptation to new DRAM standards using a Domain Specific Language. We present optimization techniques to achieve a high simulation speed while maintaining full temporal accuracy. Finally, we provide a detailed survey and comparison of the most prominent cycle-accurate open-source DRAM simulators with regard to their supported features, analysis capabilities and simulation speed.}, isbn = {978-3-030-60939-9}, } @Book{Jung2017, author = {Jung, M.}, publisher = {Technische Universit{\"a}t Kaiserslautern}, title = {System-level Modeling, Analysis and Optimization of DRAM Memories and Controller Architectures}, year = {2017}, isbn = {9783959740517}, series = {Forschungsberichte Mikroelektronik}, } @Article{Binkert2011, author = {Binkert, Nathan and Beckmann, Bradford and Black, Gabriel and Reinhardt, Steven K. and Saidi, Ali and Basu, Arkaprava and Hestness, Joel and Hower, Derek R. and Krishna, Tushar and Sardashti, Somayeh and Sen, Rathijit and Sewell, Korey and Shoaib, Muhammad and Vaish, Nilay and Hill, Mark D. and Wood, David A.}, journal = {SIGARCH Comput. Archit. News}, title = {The Gem5 Simulator}, year = {2011}, issn = {0163-5964}, month = aug, number = {2}, volume = {39}, abstract = {The gem5 simulation infrastructure is the merger of the best aspects of the M5 [4] and GEMS [9] simulators. M5 provides a highly configurable simulation framework, multiple ISAs, and diverse CPU models. GEMS complements these features with a detailed and exible memory system, including support for multiple cache coherence protocols and interconnect models. Currently, gem5 supports most commercial ISAs (ARM, ALPHA, MIPS, Power, SPARC, and x86), including booting Linux on three of them (ARM, ALPHA, and x86).The project is the result of the combined efforts of many academic and industrial institutions, including AMD, ARM, HP, MIPS, Princeton, MIT, and the Universities of Michigan, Texas, and Wisconsin. Over the past ten years, M5 and GEMS have been used in hundreds of publications and have been downloaded tens of thousands of times. The high level of collaboration on the gem5 project, combined with the previous success of the component parts and a liberal BSD-like license, make gem5 a valuable full-system simulation tool.}, address = {New York, NY, USA}, doi = {10.1145/2024716.2024718}, issue_date = {May 2011}, numpages = {7}, publisher = {Association for Computing Machinery}, url = {https://doi.org/10.1145/2024716.2024718}, } @InProceedings{Jung2017a, author = {Jung, Matthias and Kraft, Kira and Wehn, Norbert}, booktitle = {2017 International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)}, title = {A new state model for DRAMs using Petri Nets}, year = {2017}, doi = {10.1109/SAMOS.2017.8344631}, } @Book{Hennessy2011, author = {Hennessy, John L. and Patterson, David A.}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Computer Architecture, Fifth Edition: A Quantitative Approach}, year = {2011}, address = {San Francisco, CA, USA}, edition = {5th}, isbn = {012383872X}, abstract = {The computing world today is in the middle of a revolution: mobile clients and cloud computing have emerged as the dominant paradigms driving programming and hardware innovation today. The Fifth Edition of Computer Architecture focuses on this dramatic shift, exploring the ways in which software and technology in the "cloud" are accessed by cell phones, tablets, laptops, and other mobile computing devices. Each chapter includes two real-world examples, one mobile and one datacenter, to illustrate this revolutionary change. Updated to cover the mobile computing revolutionEmphasizes the two most important topics in architecture today: memory hierarchy and parallelism in all its forms.Develops common themes throughout each chapter: power, performance, cost, dependability, protection, programming models, and emerging trends ("What's Next")Includes three review appendices in the printed text. Additional reference appendices are available online.Includes updated Case Studies and completely new exercises.}, } @Article{Ghose2019, author = {Ghose, Saugata and Li, Tianshi and Hajinazar, Nastaran and Cali, Damla Senol and Mutlu, Onur}, journal = {Proc. ACM Meas. Anal. Comput. Syst.}, title = {Demystifying Complex Workload-DRAM Interactions: An Experimental Study}, year = {2019}, month = {dec}, number = {3}, volume = {3}, abstract = {It has become increasingly difficult to understand the complex interactions between modern applications and main memory, composed of Dynamic Random Access Memory (DRAM) chips. Manufacturers are now selling and proposing many different types of DRAM, with each DRAM type catering to different needs (e.g., high throughput, low power, high memory density). At the same time, memory access patterns of prevalent and emerging applications are rapidly diverging, as these applications manipulate larger data sets in very different ways. As a result, the combined DRAM-workload behavior is often difficult to intuitively determine today, which can hinder memory optimizations in both hardware and software. In this work, we identify important families of workloads, as well as prevalent types of DRAM chips, and rigorously analyze the combined DRAM-workload behavior. To this end, we perform a comprehensive experimental study of the interaction between nine different DRAM types and 115 modern applications and multiprogrammed workloads. We draw 12 key observations from our characterization, enabled in part by our development of new metrics that take into account contention between memory requests due to hardware design. Notably, we find that (1) newer DRAM technologies such as DDR4 and HMC often do not outperform older technologies such as DDR3, due to higher access latencies and, also in the case of HMC, poor exploitation of locality; (2) there is no single memory type that can effectively cater to all of the components of a heterogeneous system (e.g., GDDR5 significantly outperforms other memories for multimedia acceleration, while HMC significantly outperforms other memories for network acceleration); and (3) there is still a strong need to lower DRAM latency, but unfortunately the current design trend of commodity DRAM is toward higher latencies to obtain other benefits. We hope that the trends we identify can drive optimizations in both hardware and software design. To aid further study, we open-source our extensively-modified simulator, as well as a benchmark suite containing our applications.}, address = {New York, NY, USA}, articleno = {60}, doi = {10.1145/3366708}, issue_date = {December 2019}, keywords = {power consumption, memory systems, performance modeling, experimental characterization, dram, low-power memory, energy, 3d-stacked memory}, numpages = {50}, publisher = {Association for Computing Machinery}, url = {https://doi.org/10.1145/3366708}, } @InProceedings{Gomony2012, author = {Gomony, Manil Dev and Weis, Christian and Akesson, Benny and Wehn, Norbert and Goossens, Kees}, booktitle = {2012 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, title = {DRAM selection and configuration for real-time mobile systems}, year = {2012}, pages = {51-56}, doi = {10.1109/DATE.2012.6176432}, } @Article{Kim2016, author = {Kim, Yoongu and Yang, Weikun and Mutlu, Onur}, journal = {IEEE Computer Architecture Letters}, title = {Ramulator: A Fast and Extensible DRAM Simulator}, year = {2016}, number = {1}, pages = {45-49}, volume = {15}, doi = {10.1109/LCA.2015.2414456}, } @Article{Fog2022, author = {Agner Fog}, journal = {Technical University of Denmark}, title = {Instruction tables}, year = {2022}, month = jun, note = {Lists of instruction latencies, throughputs and micro-operation breakdowns for Intel, AMD, and VIA CPUs}, } @InProceedings{Jagtap2016, author = {Jagtap, Radhika and Diestelhorst, Stephan and Hansson, Andreas and Jung, Matthias and When, Norbert}, booktitle = {2016 International Conference on Embedded Computer Systems: Architectures, Modeling and Simulation (SAMOS)}, title = {Exploring system performance using elastic traces: Fast, accurate and portable}, year = {2016}, pages = {96-105}, doi = {10.1109/SAMOS.2016.7818336}, } @Article{Qemu, journal = {A generic and open source machine emulator and virtualizer}, title = {Q{E}{M}{U}}, note = {https://www.qemu.org/. Accessed: 2022-06-28}, } @Article{TheBandwidthBenchmark, author = {Erlangen National High Performance Computing Center}, title = {The {B}andwidth {B}enchmark}, note = {https://github.com/RRZE-HPC/TheBandwidthBenchmark. Accessed: 2022-06-28}, } @Comment{jabref-meta: databaseType:bibtex;}