Update on Overleaf.

This commit is contained in:
Lukas Steiner
2025-03-06 09:27:04 +00:00
committed by node
parent 631f3c5e26
commit ffbfaa5a21
22 changed files with 6081 additions and 768 deletions

View File

@@ -6,7 +6,6 @@
volume={23},
number={7},
pages={1254-1267},
keywords={Timing;Integrated circuit interconnections;Random access memory;Clocks;Servers;Jitter;Mobile communication;CACTI;CACTI-IO;dynamic random access memory (DRAM);IO;memory interface;power and timing models;CACTI;CACTI-IO;dynamic random access memory (DRAM);IO;memory interface;power and timing models},
doi={10.1109/TVLSI.2014.2334635}}
@inproceedings{joukah_12,
@@ -122,7 +121,6 @@ keywords = {DRAM, Memory, NVM, interconnects, tools}
volume={31},
number={7},
pages={994-1007},
keywords={Nonvolatile memory;Arrays;Phase change random access memory;Wires;Distributed databases;Integrated circuit modeling;Analytical circuit model;MRAM;NAND Flash;nonvolatile memory;phase-change random-access memory (PCRAM);resistive random-access memory (ReRAM);spin-torque-transfer memory (STT-RAM)},
doi={10.1109/TCAD.2012.2185930}}
@book{dalpou_98,
@@ -155,7 +153,6 @@ number = {2},
issn = {0362-1340},
url = {https://doi.org/10.1145/3296957.3173177},
doi = {10.1145/3296957.3173177},
abstract = {We are experiencing an explosive growth in the number of consumer devices, including smartphones, tablets, web-based computers such as Chromebooks, and wearable devices. For this class of devices, energy efficiency is a first-class concern due to the limited battery capacity and thermal power budget. We find that data movement is a major contributor to the total system energy and execution time in consumer devices. The energy and performance costs of moving data between the memory system and the compute units are significantly higher than the costs of computation. As a result, addressing data movement is crucial for consumer devices. In this work, we comprehensively analyze the energy and performance impact of data movement for several widely-used Google consumer workloads: (1) the Chrome web browser; (2) TensorFlow Mobile, Google's machine learning framework; (3) video playback, and (4) video capture, both of which are used in many video services such as YouTube and Google Hangouts. We find that processing-in-memory (PIM) can significantly reduce data movement for all of these workloads, by performing part of the computation close to memory. Each workload contains simple primitives and functions that contribute to a significant amount of the overall data movement. We investigate whether these primitives and functions are feasible to implement using PIM, given the limited area and power constraints of consumer devices. Our analysis shows that offloading these primitives to PIM logic, consisting of either simple cores or specialized accelerators, eliminates a large amount of data movement, and significantly reduces total system energy (by an average of 55.4\% across the workloads) and execution time (by an average of 54.2\%).},
journal = {SIGPLAN Not.},
month = mar,
pages = {316331},
@@ -206,7 +203,6 @@ series = {ASPLOS '18}
issn = {2379-3155},
doi = {10.1109/MICRO.2010.42},
urldate = {2024-11-14},
abstract = {Energy consumption has become a major constraint on the capabilities of computer systems. In large systems the energy consumed by Dynamic Random Access Memories (DRAM) is a significant part of the total energy consumption. It is possible to calculate the energy consumption of currently available DRAMs from their datasheets, but datasheets don't allow extrapolation to future DRAM technologies and don't show how other changes like increasing bandwidth requirements change DRAM energy consumption. This paper first presents a flexible DRAM power model which uses a description of DRAM architecture, technology and operation to calculate power usage and verifies it against datasheet values. Then the model is used together with assumptions about the DRAM roadmap to extrapolate DRAM energy consumption to future DRAM generations. Using this model we evaluate some of the proposed DRAM power reduction schemes.},
keywords = {Arrays,Capacitance,DRAM,Driver circuits,Logic gates,power,Random access memory,Transistors},
file = {/Users/myzinsky/Zotero/storage/3LW3ARUS/Vogelsang - 2010 - Understanding the Energy Consumption of Dynamic Random Access Memories.pdf;/Users/myzinsky/Zotero/storage/HEEPHYEU/5695550.html}
}
@@ -217,7 +213,6 @@ series = {ASPLOS '18}
author = {Yang, Lita and Kao, Changjung and Srikanth, Sriseshan and Morris, Daniel and Sumbul, H Ekin and Wu, Tony F and Beign{\'e}, Edith},
year = {2024},
address = {Washingron},
abstract = {Image Signal Processing (ISP) is an important component in augmented and virtual reality (AR/VR) applications. With the goal of running these applications on battery-powered edge devices, the ISP unit must satisfy rigorous power, performance, and form factor requirements. However, ISP workloads incur large memory footprints and intensive DRAM accesses that are prohibitively expensive for the stringent requirements of all-day wearable AR/VR products. Recent progress in 3D integration provides a promising solution for increasing memory capacities for iso-footprint, while achieving lower I/O power with shorter, vertical 3D interconnections. In this work, we explore and characterize two types of advanced 3D-stacked memories for ISP workloads: 3D-SRAM and 3D-DRAM. Our analysis demonstrates that by allocating additional 3D-stacked local memory to the ISP unit, we reduce expensive off-chip DRAM accesses by 57-92\%, allowing us to deploy larger ISP workloads within power budgets not previously feasible with the 2D ISP baseline architecture. Comparing the two 3D-stacked memories, we observe that the use of 3D-DRAM reduces the total ISP power consumption by up to 53\%, while 3D-SRAM achieves up to 32\% power savings due to significant leakage contribution at increasing SRAM capacities. Finally, we propose a 3D-stacked hybrid memory ISP solution, combining both 3D-SRAM and 3DDRAM, which can further improve the ISP power efficiency by an additional 9-16\% on top of a 3D-DRAM-only memory architecture. To our knowledge, this is the first study to explore the benefits of advanced 3D-stacked memory for deploying ISP workloads on AR/VR devices.},
langid = {english},
file = {/Users/myzinsky/Zotero/storage/22TRQV4G/Yang et al. - Characterization and Design of 3D-Stacked Memory for Image Signal Processing on ARVR Devices.pdf}
}
@@ -247,9 +242,6 @@ and Jung, Matthias
and Prado, Felipe S.
and Bykov, Kirill
and Wehn, Norbert",
editor="Orailoglu, Alex
and Jung, Matthias
and Reichenbach, Marc",
title="{DRAMSys4.0}: A Fast and Cycle-Accurate {SystemC/TLM}-Based {DRAM} Simulator",
booktitle="Embedded Computer Systems: Architectures, Modeling, and Simulation",
year="2020",