Try to remove ???

This commit is contained in:
2024-06-27 12:33:09 +02:00
parent 7a078f1492
commit 0cd7f92e92

View File

@@ -1599,7 +1599,7 @@ special treatment of cases that do not fulfil the Feller condition},
Abstract = {The role of characteristic functions in finance has been strongly amplified by the development of the general option pricing formula by Carr and Madan. As these functions are defined and operating in the complex plane, they potentially encompass a few well known numerical Abstract = {The role of characteristic functions in finance has been strongly amplified by the development of the general option pricing formula by Carr and Madan. As these functions are defined and operating in the complex plane, they potentially encompass a few well known numerical
issues due to ”branching”. A number of elegant publications have emerged tackling these effects specifically for the Heston model. For the latter however we have two specifications for the characteristic function as they are the solutions to a Riccati equation. In this article issues due to ”branching”. A number of elegant publications have emerged tackling these effects specifically for the Heston model. For the latter however we have two specifications for the characteristic function as they are the solutions to a Riccati equation. In this article
we put the is and cross the ts by formally pointing out the properties of and relations between both versions. For the first specification we show that for nearly any parameter choice, instabilities will occur for large enough maturities. We subsequently establish - under an additional parameter restriction - the existence of a “threshold” maturity from which the complex operations become a spoil-sport. For the second specification of the characteristic function it is proved that stability is guaranteed under the full dimensional and unrestricted parameter space. We blend the theoretical results with a few examples.}, we put the is and cross the ts by formally pointing out the properties of and relations between both versions. For the first specification we show that for nearly any parameter choice, instabilities will occur for large enough maturities. We subsequently establish - under an additional parameter restriction - the existence of a “threshold” maturity from which the complex operations become a spoil-sport. For the second specification of the characteristic function it is proved that stability is guaranteed under the full dimensional and unrestricted parameter space. We blend the theoretical results with a few examples.},
Cds_grade = {0}, Cds_grade = {0},
Cds_keywords = {Feller condition}, Cds_keywords = {Feller condition},
@@ -5363,7 +5363,7 @@ Design Space Exploration of Turbo Decoders, Lehrstuhl-Paper},
Month = feb, Month = feb,
Year = {2011}, Year = {2011},
Abstract = {Pricing and risk analysis for today's exotic structured equity products is computationally more and more demanding and time consuming. GPUs offer the possibility to significantly increase computing performance even at reduced costs. We applied this technology to replace a large amount of our CPU based computing grid by hybrid GPU/CPU pricing engines. Abstract = {Pricing and risk analysis for today's exotic structured equity products is computationally more and more demanding and time consuming. GPUs offer the possibility to significantly increase computing performance even at reduced costs. We applied this technology to replace a large amount of our CPU based computing grid by hybrid GPU/CPU pricing engines.
One GPU based pricing engine with two Tesla C1060 replaced 140 CPU cores in performing Monte Carlo based simulation of our productive structured equity portfolio with the local and stochastic volatility models. Instantaneous calibration of the piecewise timedependent Heston model on a single GPU is enabled.}, One GPU based pricing engine with two Tesla C1060 replaced 140 CPU cores in performing Monte Carlo based simulation of our productive structured equity portfolio with the local and stochastic volatility models. Instantaneous calibration of the piecewise timedependent Heston model on a single GPU is enabled.},
Address = {Herzogstrasse 17 Address = {Herzogstrasse 17
@@ -8131,7 +8131,7 @@ However, over the years more and more theoretical and practical subtleties have
In contrast to homogeneous systems, heterogeneous systems adapt much better to the actual computing needs and come with a lower energy consumption. They are already emerging today with the wide adoption of accelerator cards like GPUs, FPGAs, or many-core architectures like the Xeon Phi extending standard CPUs nodes. However, heterogeneous systems come with unique challenges, i.e. first what is the right partitioning and mapping of different parts of the algorithm to the different subsystems and second how to efficiently explore those choices without having to create time-consuming implementations for all the choices. In contrast to homogeneous systems, heterogeneous systems adapt much better to the actual computing needs and come with a lower energy consumption. They are already emerging today with the wide adoption of accelerator cards like GPUs, FPGAs, or many-core architectures like the Xeon Phi extending standard CPUs nodes. However, heterogeneous systems come with unique challenges, i.e. first what is the right partitioning and mapping of different parts of the algorithm to the different subsystems and second how to efficiently explore those choices without having to create time-consuming implementations for all the choices.
In this poster we tackle dedicated heterogeneous computing platforms for two big data applications: Financial Model Calibration and Graph Similarity Analysis. For both of them we present efficient hybrid architectures based on CPUs and FPGAs. First results show massive improvements in both runtime and energy-efficiency compared to homogeneous implementations on CPU clusters. In this poster we tackle dedicated heterogeneous computing platforms for two big data applications: Financial Model Calibration and Graph Similarity Analysis. For both of them we present efficient hybrid architectures based on CPUs and FPGAs. First results show massive improvements in both runtime and energy-efficiency compared to homogeneous implementations on CPU clusters.
Many financial applications such as derivative pricing or risk management rely on assumptions about the future evolvement of assets on financial markets. The behavior of such assets is in general described by (complex) market models that incorporate a number of tuning parameters. Calibrating those model parameters to the markets is crucial to achieve meaningful simulation results. Many institutes spend the whole night running those calibration tasks on their clusters to have the values ready on the next morning (typically up to >10k assets with up to 2k options / asset). Many financial applications such as derivative pricing or risk management rely on assumptions about the future evolvement of assets on financial markets. The behavior of such assets is in general described by (complex) market models that incorporate a number of tuning parameters. Calibrating those model parameters to the markets is crucial to achieve meaningful simulation results. Many institutes spend the whole night running those calibration tasks on their clusters to have the values ready on the next morning (typically up to >10k assets with up to 2k options / asset).
While calibration tries to minimize the error between simulated and observed asset prices by varying the model parameters, more than 99% of compute time are spent in evaluating (semi-)closed-form solutions for standard products such as plain vanilla calls. By implementing accelerators with optimized data paths for computing those prices we show that we achieve a 4x faster system that only consumes 3% of energy on a hybrid Xilinx Zynq device compared to a multi-core Xeon CPU. While calibration tries to minimize the error between simulated and observed asset prices by varying the model parameters, more than 99% of compute time are spent in evaluating (semi-)closed-form solutions for standard products such as plain vanilla calls. By implementing accelerators with optimized data paths for computing those prices we show that we achieve a 4x faster system that only consumes 3% of energy on a hybrid Xilinx Zynq device compared to a multi-core Xeon CPU.
Finding similarities in complex networks is a classic big data problem and the core of every recommendation system. One example is the Netflix dataset containing 17k movies, 480k users and 200 million ratings, with the goal to find related movies for recommendations. The algorithm we have chosen extracts significant information by comparing the dataset with random graphs, generated without similarities. Generating the graphs involves in our example 1013 random accesses to memory and the results matrix has 1010 values, posing huge challenges for the memory hierarchy of standard CPU systems. We present a CPU/FPGA architecture that is 20x faster and 1000x more power efficient than a cluster implementation. It exploits custom data paths, and very high bit-level parallelism based on 1-4 bit operations. With a custom cache architecture the system is able to calculate more than 200 movies in parallel. Those optimizations are unique to FPGAs, while the outer part of the algorithm is handled by the CPU.}, Finding similarities in complex networks is a classic big data problem and the core of every recommendation system. One example is the Netflix dataset containing 17k movies, 480k users and 200 million ratings, with the goal to find related movies for recommendations. The algorithm we have chosen extracts significant information by comparing the dataset with random graphs, generated without similarities. Generating the graphs involves in our example 1013 random accesses to memory and the results matrix has 1010 values, posing huge challenges for the memory hierarchy of standard CPU systems. We present a CPU/FPGA architecture that is 20x faster and 1000x more power efficient than a cluster implementation. It exploits custom data paths, and very high bit-level parallelism based on 1-4 bit operations. With a custom cache architecture the system is able to calculate more than 200 movies in parallel. Those optimizations are unique to FPGAs, while the outer part of the algorithm is handled by the CPU.},
Cds_grade = {5}, Cds_grade = {5},
Doi = {10.13140/2.1.1887.4568}, Doi = {10.13140/2.1.1887.4568},
@@ -10158,7 +10158,7 @@ care.},
@Article{choche_15, @Article{choche_15,
Title = {{U}nderstanding {S}oft {E}rrors in {U}ncore {C}omponents}, Title = {{U}nderstanding {S}oft {E}rrors in {U}ncore {C}omponents},
Author = {Hyungmin Cho and Author = {Hyungmin Cho and
Chen{-}Yong Cher and Chen{-}Yong Cher and
Thomas Shepherd and Thomas Shepherd and
Subhasish Mitra}, Subhasish Mitra},
@@ -24920,7 +24920,7 @@ and Wehn, Norbert},
Cb_grade = {- ungelesen Cb_grade = {- ungelesen
- Reliability - Reliability
- -
- Recovery, Processor, Monitor}, - Recovery, Processor, Monitor},
File = {kahkan_10.pdf:kahkan_10.pdf:PDF}, File = {kahkan_10.pdf:kahkan_10.pdf:PDF},
Keywords = {Reliability PMF Probability Mass Function}, Keywords = {Reliability PMF Probability Mass Function},
@@ -33767,7 +33767,7 @@ variables, with a short procedure for setting up the necessary tables.},
@Article{meesze_14, @Article{meesze_14,
Title = {{O}verview of emerging nonvolatile memory technologies}, Title = {{O}verview of emerging nonvolatile memory technologies},
Author = {Meena, Jagan Singh and Author = {Meena, Jagan Singh and
Sze, Simon Min and Sze, Simon Min and
Chand, Umesh and Chand, Umesh and
Tseng, Tseung-Yuen}, Tseng, Tseung-Yuen},
Journal = {Nanoscale Research Letters}, Journal = {Nanoscale Research Letters},
@@ -34893,7 +34893,7 @@ relies strongly on simulation results, only few theoretical considerations},
Cb_grade = {- ungelesen Cb_grade = {- ungelesen
- Reliability - Reliability
- -
- Video}, - Video},
Doi = {10.1109/IPDPS.2005.13}, Doi = {10.1109/IPDPS.2005.13},
File = {mohcor_05.pdf:mohcor_05.pdf:PDF}, File = {mohcor_05.pdf:mohcor_05.pdf:PDF},
@@ -37578,7 +37578,7 @@ In this paper we analyze and compare the performance of STAC-A2 workloads on two
publisher = {IEEE}, publisher = {IEEE},
url = {https://www.microsoft.com/en-us/research/publication/stall-time-fair-memory-access-scheduling-for-chip-multiprocessors/}, url = {https://www.microsoft.com/en-us/research/publication/stall-time-fair-memory-access-scheduling-for-chip-multiprocessors/},
abstract = {DRAM memory is a major resource shared among cores in a chip multiprocessor (CMP) system. Memory requests from different threads can interfere with each other. Existing memory access scheduling techniques try to optimize the overall data throughput obtained from the DRAM and thus do not take into account inter-thread interference. abstract = {DRAM memory is a major resource shared among cores in a chip multiprocessor (CMP) system. Memory requests from different threads can interfere with each other. Existing memory access scheduling techniques try to optimize the overall data throughput obtained from the DRAM and thus do not take into account inter-thread interference.
Therefore, different threads running together on the same chip can experience extremely different memory system performance: one thread can experience a severe slowdown or starvation while another is unfairly prioritized by the memory scheduler. This paper proposes a new memory access scheduler, called the Stall-Time Fair Memory scheduler (STFM), that provides quality of service to different threads sharing the DRAM memory system. The goal of the proposed scheduler is to “equalize” the DRAM-related slowdown experienced by each thread due to interference from other threads, without hurting overall system performance. As such, STFM takes into account inherent memory characteristics of each thread and does not unfairly penalize threads that use the DRAM system without interfering with other threads. Therefore, different threads running together on the same chip can experience extremely different memory system performance: one thread can experience a severe slowdown or starvation while another is unfairly prioritized by the memory scheduler. This paper proposes a new memory access scheduler, called the Stall-Time Fair Memory scheduler (STFM), that provides quality of service to different threads sharing the DRAM memory system. The goal of the proposed scheduler is to “equalize” the DRAM-related slowdown experienced by each thread due to interference from other threads, without hurting overall system performance. As such, STFM takes into account inherent memory characteristics of each thread and does not unfairly penalize threads that use the DRAM system without interfering with other threads.
We show that STFM significantly reduces the unfairness in the DRAM system while also improving system throughput (i.e., weighted speedup of threads) on a wide variety of workloads and systems. For example, averaged over 32 different workloads running on an 8-core CMP, the ratio between the highest DRAM-related slowdown and the lowest DRAM-related slowdown reduces from 5.26X to 1.4X, while the average system throughput improves by 7.6%. We qualitatively and quantitatively compare STFM to one new and three previouslyproposed memory access scheduling algorithms, including network fair queueing. Our results show that STFM provides the best fairness, system throughput, and scalability.}, We show that STFM significantly reduces the unfairness in the DRAM system while also improving system throughput (i.e., weighted speedup of threads) on a wide variety of workloads and systems. For example, averaged over 32 different workloads running on an 8-core CMP, the ratio between the highest DRAM-related slowdown and the lowest DRAM-related slowdown reduces from 5.26X to 1.4X, while the average system throughput improves by 7.6%. We qualitatively and quantitatively compare STFM to one new and three previouslyproposed memory access scheduling algorithms, including network fair queueing. Our results show that STFM provides the best fairness, system throughput, and scalability.},
month = {December}, month = {December},
owner = {MJ}, owner = {MJ},
@@ -38530,7 +38530,7 @@ zu oberfl�chlich!},
booktitle = {Euro. Solid-State Circuits Conference}, booktitle = {Euro. Solid-State Circuits Conference},
title = {{ASIC} {I}mplementation of a {MIMO}-{OFDM} {T}ransceiver for 192 {M}bps {WLAN}s}, title = {{ASIC} {I}mplementation of a {MIMO}-{OFDM} {T}ransceiver for 192 {M}bps {WLAN}s},
pages = {215�218}, pages = {215�218},
comment = {CG: comment = {CG:
Konkurrenz zu UMIC-Project}, Konkurrenz zu UMIC-Project},
file = {perhae_05.pdf:perhae_05.pdf:PDF}, file = {perhae_05.pdf:perhae_05.pdf:PDF},
grade = {4}, grade = {4},
@@ -41084,9 +41084,9 @@ based on graphs, gives estimation of speedup related to number of cores, also of
Year = {2009}, Year = {2009},
Pages = {1--6}, Pages = {1--6},
Cb_grade = {001 Cb_grade = {001
ASIP, multi-standard, ACS ASIP, multi-standard, ACS
Relevance: high Relevance: high
2010-10-05 2010-10-05
Flexible ACS Unit with 4 double butterflies like in FlexiTreP Flexible ACS Unit with 4 double butterflies like in FlexiTreP
@@ -42030,7 +42030,7 @@ formlerly: design methodologies based on 1) formalization, 2) abstraction, 3) de
Cb_grade = {- ungelesen Cb_grade = {- ungelesen
- Reliability - Reliability
- -
- Technology, Empfehlung Norbert}, - Technology, Empfehlung Norbert},
Doi = {10.1109/JETCAS.2011.2138250}, Doi = {10.1109/JETCAS.2011.2138250},
File = {sap_11.pdf:sap_11.pdf:PDF}, File = {sap_11.pdf:sap_11.pdf:PDF},
@@ -46236,8 +46236,6 @@ and Wehn, Norbert},
Year = {2020}, Year = {2020},
Month = {July}, Month = {July},
Publisher = {Springer}, Publisher = {Springer},
Owner = {MJ},
Timestamp = {2020-07-14} Timestamp = {2020-07-14}
} }
@@ -58182,7 +58180,6 @@ The presentation of your work and the essential context is very good. The paper
date = {2019}, date = {2019},
title = {On Fault-Effect Analysis at the Virtual-Prototype Abstraction Level}, title = {On Fault-Effect Analysis at the Virtual-Prototype Abstraction Level},
publisher = {TU München}, publisher = {TU München},
owner = {mj},
year = {2019} year = {2019}
} }