misc: Merge the v22.1 release staging into stable

This commit is contained in:
Bobby R. Bruce
2022-12-30 19:53:52 +00:00
2187 changed files with 82773 additions and 57834 deletions

View File

@@ -22,3 +22,7 @@ ecd1e05f5725832c2d5dfdc53f4c4100bf763284
c3bd8eb1214cbebbc92c7958b80aa06913bce3ba
488ded0c8d9e43deef531ad174937982b41f8e4b
26e888965d08486aeed7ebb3ef934ceb1a38cd6f
# A commit which ran Python Black on all Python files.
# https://gem5-review.googlesource.com/c/public/gem5/+/47024
787204c92d876dd81357b75aede52d8ef5e053d3

1
.gitignore vendored
View File

@@ -31,3 +31,4 @@ m5out
configs/example/memcheck.cfg
configs/dram/lowp_sweep.cfg
.pyenv
.vscode

222
.mailmap
View File

@@ -1,37 +1,43 @@
ARM gem5 Developers <none@none>
Abdul Mutaal Ahmad <abdul.mutaal@gmail.com>
adarshpatil <adarshpatil123@gmail.com>
Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
Adrian Herrera <adrian.herrera@arm.com>
Adrien Pesle <adrien.pesle@arm.com>
Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
Akash Bagdia <akash.bagdia@ARM.com> Akash Bagdia <akash.bagdia@arm.com>
Alec Roelke <alec.roelke@gmail.com> Alec Roelke <ar4jc@virginia.edu>
Alexander Klimov <Alexander.Klimov@arm.com>
Alexandru Dutu <alexandru.dutu@amd.com> Alexandru <alexandru.dutu@amd.com>
Alex Richardson <alexrichardson@google.com>
Ali Jafri <ali.jafri@arm.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <ali.saidi@arm.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <saidi@eecs.umich.edu>
Alistair Delva <adelva@google.com>
Amin Farmahini <aminfar@gmail.com>
Anders Handler <s052838@student.dtu.dk>
Andrea Mondelli <andrea.mondelli@ucf.edu> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <andrea.mondelli@ucf.edu>
Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
Andrea Pellegrini <andrea.pellegrini@gmail.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@arm.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@armm.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas.sandberg@arm.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas@sandberg.pp.se>
Andrew Bardsley <Andrew.Bardsley@arm.com> Andrew Bardsley <Andreas.Bardsley@arm.com>
Andrew Lukefahr <lukefahr@umich.edu>
Andrew Schultz <alschult@umich.edu>
Andriani Mappoura <andriani.mappoura@arm.com>
Ani Udipi <ani.udipi@arm.com>
Angie Lee <peiyinglee@google.com>
Anis Peysieux <anis.peysieux@inria.fr>
Ani Udipi <ani.udipi@arm.com>
Anouk Van Laer <anouk.vanlaer@arm.com>
Arthur Perais <arthur.perais@inria.fr>
ARM gem5 Developers <none@none>
Arthur Perais <Arthur.Perais@univ-grenoble-alpes.fr> Arthur Perais <arthur.perais@inria.fr>
Arun Rodrigues <afrodri@gmail.com>
Ashkan Tousi <ashkan.tousimojarad@arm.com>
Austin Harris <austinharris@utexas.edu>
Richard D. Strong <r.d.strong@gmail.com>
Austin Harris <austinharris@utexas.edu> Austin Harris <mail@austin-harris.com>
Avishai Tvila <avishai.tvila@gmail.com>
Ayaz Akram <yazakram@ucdavis.edu>
Bagus Hanindhito <hanindhito@bagus.my.id>
@@ -41,80 +47,108 @@ Binh Pham <binhpham@cs.rutgers.edu>
Bjoern A. Zeeb <baz21@cam.ac.uk>
Blake Hechtman <bah13@duke.edu> Blake Hechtman <blake.hechtman@amd.com>
Blake Hechtman <bah13@duke.edu> Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <bah13@duke.edu>
Bobby R. Bruce <bbruce@ucdavis.edu>
Bobby R. Bruce <bbruce@ucdavis.edu> Bobby Bruce <bbruce@amarillo.cs.ucdavis.edu>
Boris Shingarov <shingarov@gmail.com> Boris Shingarov <shingarov@labware.com>
Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann <Brad.Beckmann@amd.com>
Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <Brad.Beckmann@amd.com>
Brad Danofsky <bradley.danofsky@amd.com>
Bradley Wang <radwang@ucdavis.edu> Bradley <animalvgamer@gmail.com>
Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
Brandon Potter <brandon.potter@amd.com> bpotter <brandon.potter@amd.com>
Brandon Potter <brandon.potter@amd.com> Brandon Potter <Brandon.Potter@amd.com>
Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
Brian Grayson <b.grayson@samsung.com>
Cagdas Dirik <cdirik@micron.com> cdirik <cdirik@micron.com>
Carlos Falquez <c.falquez@fz-juelich.de>
Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@arm.com>
Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@ARM.com>
Charles Jamieson <cjamieson2@wisc.edu>
CHEN Meng <tundriolaxy@gmail.com>
Chen Zou <chenzou@uchicago.edu>
Chia-You Chen <hortune@google.com>
Chow, Marcus <marcus.chow@amd.com>
Chris Adeniyi-Jones <Chris.Adeniyi-Jones@arm.com>
Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@arm.com>
Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
Chris January <chris.january@arm.com>
Christian Menard <christian.menard@tu-dresden.de> Christian Menard <Christian.Menard@tu-dresden.de>
Christoph Pfister <pfistchr@student.ethz.ch>
Christopher Torng <clt67@cornell.edu>
Christoph Pfister <pfistchr@student.ethz.ch>
Chuan Zhu <chuan.zhu@arm.com>
Chun-Chen Hsu <chunchenhsu@google.com> Chun-Chen TK Hsu <chunchenhsu@google.com>
Ciro Santilli <ciro.santilli@arm.com>
Clint Smullen <cws3k@cs.virginia.edu>
Cui Jin <cuijinbird@gmail.com> Cui Jin <cuijin7@huawei.com>
Curtis Dunham <Curtis.Dunham@arm.com>
Daecheol You <daecheol.you@samsung.com>
Dam Sunwoo <dam.sunwoo@arm.com>
Dan Gibson <gibson@cs.wisc.edu>
Daniel Carvalho <odanrc@yahoo.com.br> Daniel <odanrc@yahoo.com.br>
Daniel Carvalho <odanrc@yahoo.com.br> Daniel R. Carvalho <odanrc@yahoo.com.br>
Daniel Gerzhoy <daniel.gerzhoy@gmail.com>
Daniel Johnson <daniel.johnson@arm.com>
Daniel Sanchez <sanchezd@stanford.edu>
Davide Basilio Bartolini <davide.basilio.bartolini@huawei.com>
David Guillen-Fandos <david.guillen@arm.com> David Guillen <david.guillen@arm.com>
David Guillen-Fandos <david.guillen@arm.com> David Guillen Fandos <david.guillen@arm.com>
David Hashe <david.hashe@amd.com> David Hashe <david.j.hashe@gmail.com>
David Oehmke <doehmke@umich.edu>
David Schall <david.schall2@arm.com>
Derek Christ <dchrist@rhrk.uni-kl.de>
Derek Hower <drh5@cs.wisc.edu>
Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <guodeyuan@tsinghua.org.cn>
Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
Dibakar Gope <gope@wisc.edu> Dibakar Gope ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <gope@wisc.edu>
Dimitrios Chasapis <k4s4s.heavener@gmail.com>
Djordje Kovacevic <djordje.kovacevic@arm.com> Djordje Kovacevic <Djordje.Kovacevic@arm.com>
Dongxue Zhang <elta.era@gmail.com>
Doğukan Korkmaztürk <d.korkmazturk@gmail.com>
Dongxue Zhang <elta.era@gmail.com>
Dylan Johnson <Dylan.Johnson@ARM.com>
Earl Ou <shunhsingou@google.com>
eavivi <eavivi@ucdavis.edu>
Éder F. Zulian <zulian@eit.uni-kl.de>
Edmund Grimley Evans <Edmund.Grimley-Evans@arm.com>
Eduardo José Gómez Hernández <eduardojose.gomez@um.es>
Eliot Moss <moss@cs.umass.edu>
Emilio Castillo <castilloe@unican.es> Emilio Castillo <ecastill@bsc.es>
Emilio Castillo <castilloe@unican.es> Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <castilloe@unican.es>
Emily Brickey <esbrickey@ucdavis.edu>
Erfan Azarkhish <erfan.azarkhish@unibo.it>
Erhu <fengerhu.ipads@gmail.com>
Eric Van Hensbergen <eric.vanhensbergen@arm.com> Eric Van Hensbergen <Eric.VanHensbergen@ARM.com>
Eric Ye <ericye@google.com>
Erik Hallnor <ehallnor@umich.edu>
Erik Tomusk <E.Tomusk@sms.ed.ac.uk>
Faissal Sleiman <Faissal.Sleiman@arm.com> Faissal Sleiman <sleimanf@umich.edu>
Fernando Endo <fernando.endo2@gmail.com>
Franklin He <franklinh@google.com>
Gabe Black <gabe.black@gmail.com> Gabe Black <gabeblack@google.com>
Gabe Black <gabe.black@gmail.com> Gabe Black <gblack@eecs.umich.edu>
Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
Gabor Dozsa <gabor.dozsa@arm.com>
Gabriel Busnot <gabriel.busnot@arteris.com>
gauravjain14 <gjain6@wisc.edu>
Gedare Bloom <gedare@rtems.org> Gedare Bloom <gedare@gwmail.gwu.edu>
Gene Wu <gene.wu@arm.com> Gene WU <gene.wu@arm.com>
Gene WU <gene.wu@arm.com> Gene Wu <Gene.Wu@arm.com>
Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <blakeg@umich.edu>
Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
Georg Kotheimer <georg.kotheimer@mailbox.tu-dresden.de>
Giacomo Gabrielli <giacomo.gabrielli@arm.com> Giacomo Gabrielli <Giacomo.Gabrielli@arm.com>
Giacomo Travaglini <giacomo.travaglini@arm.com>
Glenn Bergmans <glenn.bergmans@arm.com>
GWDx <gwdx@mail.ustc.edu.cn>
Hamid Reza Khaleghzadeh <khaleghzadeh@gmail.com> Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <khaleghzadeh@gmail.com>
handsomeliu <handsomeliu@google.com>
Hanhwi Jang <jang.hanhwi@gmail.com>
Hoa Nguyen <hoanguyen@ucdavis.edu>
Hongil Yoon <ongal@cs.wisc.edu>
Hsuan Hsu <hsuan.hsu@mediatek.com>
huangjs <jiasen.hjs@alibaba-inc.com>
Hussein Elnawawy <hussein.elnawawy@gmail.com>
Ian Jiang <ianjiang.ict@gmail.com>
IanJiangICT <ianjiang.ict@gmail.com>
Ilias Vougioukas <Ilias.Vougioukas@ARM.com>
Iru Cai <mytbk920423@gmail.com>
Isaac Richter <isaac.richter@rochester.edu>
Isaac Sánchez Barrera <isaac.sanchez@bsc.es>
Ivan Pizarro <ivan.pizarro@metempsy.com>
@@ -123,104 +157,152 @@ Jairo Balart <jairo.balart@metempsy.com>
Jakub Jermar <jakub@jermar.eu>
James Clarkson <james.clarkson@arm.com>
Jan-Peter Larsson <jan-peter.larsson@arm.com>
Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
Jan Vrany <jan.vrany@labware.com>
Jarvis Jia <jia44@wisc.edu>
Jasjeet Rangi <jasrangi@ucdavis.edu>
Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <powerjg@cs.wisc.edu>
Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
Jason Lowe-Power <jason@lowepower.com> Jason Power ext:(%2C%20Joel%20Hestness%20%3Chestness%40cs.wisc.edu%3E) <power.jg@gmail.com>
Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
Jason Yu <yuzhijingcheng1996@hotmail.com>
Javier Bueno Hedo <javier.bueno@metempsy.com> Javier Bueno <javier.bueno@metempsy.com>
Javier Cano-Cano <javier.cano555@gmail.com>
Javier Garcia Hernandez <avefenixavefenix@gmail.com>
Javier Setoain <javier.setoain@arm.com>
Jayneel Gandhi <jayneel@cs.wisc.edu>
Jennifer Treichler <jtreichl@umich.edu>
Jieming Yin <jieming.yin@amd.com>
Jerin Joy <joy@rivosinc.com>
Jiajie Chen <c@jia.je>
Jiasen Huang <jiasen.hjs@alibaba-inc.com>
Jiasen <jiasen.hjs@alibaba-inc.com>
Jiayi Huang <jyhuang91@gmail.com>
jiegec <noc@jiegec.ac.cn>
Jieming Yin <jieming.yin@amd.com> jiemingyin <bjm419@gmail.com>
Jing Qu <jqu32@wisc.edu> JingQuJQ <jqu32@wisc.edu>
Jiuyue Ma <majiuyue@ncic.ac.cn>
Joe Gross <joe.gross@amd.com> Joe Gross <joseph.gross@amd.com>
Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.utexas.edu>
Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.wisc.edu>
Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
Joël Porquet-Lupine <joel@porquet.org>
John Alsop <johnathan.alsop@amd.com>
John Kalamatianos <john.kalamatianos@amd.com> jkalamat <john.kalamatianos@amd.com>
Jordi Vaquero <jordi.vaquero@metempsy.com>
Jose Marinho <jose.marinho@arm.com>
Juan M. Cebrian <jm.cebriangonzalez@gmail.com>
Jui-min Lee <fcrh@google.com>
kai.ren <kai.ren@streamcomputing.com> Kai Ren <binarystar2006@outlook.com>
Kanishk Sugand <kanishk.sugand@arm.com>
Karthik Sangaiah <karthik.sangaiah@arm.com>
Kaustav Goswami <kggoswami@ucdavis.edu>
Kelly Nguyen <klynguyen@ucdavis.edu>
Ke Meng <mengke97@hotmail.com>
Kevin Brodsky <kevin.brodsky@arm.com>
Kevin Lim <ktlim@umich.edu>
Kevin Loughlin <kevlough@umich.edu>
Khalique <khalique913@gmail.com>
Koan-Sin Tan <koansin.tan@gmail.com>
Korey Sewell <ksewell@umich.edu>
Krishnendra Nathella <Krishnendra.Nathella@arm.com> Krishnendra Nathella <krinat01@arm.com>
ksco <numbksco@gmail.com>
kunpai <kunpai@ucdavis.edu>
Kyle Roarty <kyleroarty1716@gmail.com> Kyle Roarty <Kyle.Roarty@amd.com>
Laura Hinman <llhinman@ucdavis.edu>
Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc,edu>
Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc.edu>
Lisa Hsu <Lisa.Hsu@amd.com> Lisa Hsu <hsul@eecs.umich.edu>
Lluc Alvarez <lluc.alvarez@bsc.es>
Lluís Vilanova <vilanova@ac.upc.edu> Lluis Vilanova <vilanova@ac.upc.edu>
Lukas Steiner <lsteiner@rhrk.uni-kl.de>
Luming Wang <wlm199558@126.com>
m5test <m5test@zizzer>
Mahyar Samani <msamani@ucdavis.edu>
Majid Jalili <majid0jalili@gmail.com>
Malek Musleh <malek.musleh@gmail.com> Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) <nilay@cs.wisc.edu>
Marc Mari Barcelo <marc.maribarcelo@arm.com>
Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
Marco Balboni <Marco.Balboni@ARM.com>
Marco Elver <Marco.Elver@ARM.com> Marco Elver <marco.elver@ed.ac.uk>
Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
Mark Hildebrand <mhildebrand@ucdavis.edu>
Marton Erdos <marton.erdos@arm.com>
Maryam Babaie <mbabaie@ucdavis.edu>
Matt DeVuyst <mdevuyst@gmail.com>
Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
Matteo Andreozzi <matteo.andreozzi@arm.com> Matteo Andreozzi <Matteo.Andreozzi@arm.com>
Matteo M. Fusi <matteo.fusi@bsc.es>
Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
Matthew Poremba <matthew.poremba@amd.com> Matthew Poremba <Matthew.Poremba@amd.com>
Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
Matthias Hille <matthiashille8@gmail.com>
Matthias Jung <jungma@eit.uni-kl.de>
Matthias Jung <matthias.jung@iese.fraunhofer.de>
Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
Matt Sinclair <mattdsinclair.wisc@gmail.com> Matt Sinclair <Matthew.Sinclair@amd.com>
Maurice Becker <madnaurice@googlemail.com>
Maxime Martinasso <maxime.cscs@gmail.com>
Maximilian Stein <maximilian.stein@tu-dresden.de>
Maximilian Stein <maximilian.stein@tu-dresden.de>Maximilian Stein <m@steiny.biz>
Maximilien Breughe <maximilien.breughe@elis.ugent.be> Maximilien Breughe <Maximilien.Breughe@elis.ugent.be>
Melissa Jost <melissakjost@gmail.com>
Michael Adler <Michael.Adler@intel.com>
Michael Boyer <Michael.Boyer@amd.com>
Michael LeBeane <michael.lebeane@amd.com> Michael LeBeane <Michael.Lebeane@amd.com>
Michael LeBeane <michael.lebeane@amd.com> mlebeane <michael.lebeane@amd.com>
Michael Levenhagen <mjleven@sandia.gov>
Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
Michiel Van Tol <michiel.vantol@arm.com> Michiel van Tol <Michiel.VanTol@arm.com>
Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
Miguel Serrano <mserrano@umich.edu>
Mike Upton <michaelupton@gmail.com>
Miles Kaufmann <milesck@eecs.umich.edu>
Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
Mingyuan <xiang_my@outlook.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitchell Hayenga <Mitchell.Hayenga@ARM.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
Mohammad Alian <m.alian1369@gmail.com>
Monir Mozumder <monir.mozumder@amd.com>
Moyang Wang <mw828@cornell.edu>
Mrinmoy Ghosh <mrinmoy.ghosh@arm.com> Mrinmoy Ghosh <Mrinmoy.Ghosh@arm.com>
Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
Muhammad Sarmad Saeed <mssaeed@ucdavis.edu>
Nadia Etemadi <netemadi@ucdavis.edu>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <Nathanael.Premillieu@arm.com>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@huawei.com>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@irisa.fr>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathananel.premillieu@arm.com>
Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <npremill@irisa.fr>
Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
Nayan Deshmukh <nayan26deshmukh@gmail.com>
Neha Agarwal <neha.agarwal@arm.com>
Neil Natekar <nanatekar@ucdavis.edu>
Nicholas Lindsay <nicholas.lindsay@arm.com>
Nicolas Boichat <drinkcat@google.com>
Nicolas Derumigny <nderumigny@gmail.com>
Nicolas Zea <nicolas.zea@gmail.com>
Nikos Nikoleris <nikos.nikoleris@arm.com> Nikos Nikoleris <nikos.nikoleris@gmail.com>
Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
Nils Asmussen <nils.asmussen@barkhauseninstitut.org> Nils Asmussen <nilsasmussen7@gmail.com>
Noah Katz <nkatz@rivosinc.com>
ntampouratzis <ntampouratzis@isc.tuc.gr>
Nuwan Jayasena <Nuwan.Jayasena@amd.com>
Ola Jeppsson <ola.jeppsson@gmail.com>
Omar Naji <Omar.Naji@arm.com>
Onur Kayiran <onur.kayiran@amd.com>
Pablo Prieto <pablo.prieto@unican.es>
paikunal <kunpai@ucdavis.edu>
Palle Lyckegaard <palle@lyckegaard.dk>
Pau Cabre <pau.cabre@metempsy.com>
Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <dramninjas@gmail.com>
Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <prosenfeld@micon.com>
Peter Enns <Peter.Enns@arm.com> Pierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
Peter <petery.hin@huawei.com>
Peter Yuen <ppeetteerrsx@gmail.com>
Philip Metzler <cpmetz@google.com>
Pierre Ayoub <pierre.ayoub.pro@tutanota.com>
Pin-Yen Lin <treapking@google.com>
Po-Hao Su <supohaosu@gmail.com>
Polina Dudnik <pdudnik@cs.wisc.edu> Polina Dudnik <pdudnik@gmail.com>
@@ -229,23 +311,26 @@ Pouya Fotouhi <pfotouhi@ucdavis.edu> Pouya Fotouhi <Pouya.Fotouhi@amd.com>
Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhani <Prakash.Ramrakhani@arm.com>
Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhyani <Prakash.Ramrakhyani@arm.com>
Pritha Ghoshal <pritha9987@tamu.edu>
Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Radhika Jagtap <radhika.jagtap@arm.com> Radhika Jagtap <radhika.jagtap@ARM.com>
Rahul Thakur <rjthakur@google.com>
Reiley Jeapaul <Reiley.Jeyapaul@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <rekai.gonzalezalberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
Rene de Jong <rene.dejong@arm.com>
Ricardo Alves <ricardo.alves@arm.com>
Richard Cooper <richard.cooper@arm.com>
Richard D. Strong <r.d.strong@gmail.com>
Richard Strong <rstrong@hp.com> Richard Strong <r.d.strong@gmail.com>
Richard Strong <rstrong@hp.com> Richard Strong <rstrong@cs.ucsd.edu>
Richard Strong <rstrong@hp.com> Rick Strong <rstrong@cs.ucsd.edu>
Rico Amslinger <rico.amslinger@informatik.uni-augsburg.de>
Riken Gohil <Riken.Gohil@arm.com>
Rizwana Begum <rb639@drexel.edu>
Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
Robert Kovacsics <rmk35@cl.cam.ac.uk>
Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
Rohit Kurup <rohit.kurup@arm.com>
Ron Dreslinski <rdreslin@umich.edu> Ronald Dreslinski <rdreslin@umich.edu>
Ruben Ayrapetyan <ruben.ayrapetyan@arm.com>
@@ -253,20 +338,27 @@ Rune Holm <rune.holm@arm.com>
Ruslan Bukin <br@bsdpad.com> Ruslan Bukin ext:(%2C%20Zhang%20Guoye) <br@bsdpad.com>
Rutuja Oza <roza@ucdavis.edu>
Ryan Gambord <gambordr@oregonstate.edu>
sacak32 <byrakocalan99@gmail.com>
Sampad Mohapatra <sampad.mohapatra@gmail.com>
Samuel Grayson <sam@samgrayson.me>
Sandipan Das <sandipan@linux.ibm.com>
Samuel Stark <samuel.stark2@arm.com>
Sandipan Das <31861871+sandip4n@users.noreply.github.com>
Sandipan Das <sandipan@linux.ibm.com> Sandipan Das <31861871+sandip4n@users.noreply.github.com>
Santi Galan <santi.galan@metempsy.com>
Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <sascha.bischoff@ARM.com>
Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
Sean McGoogan <Sean.McGoogan@arm.com>
Sean Wilson <spwilson2@wisc.edu>
Sergei Trofimov <sergei.trofimov@arm.com>
Severin Wischmann <wiseveri@student.ethz.ch> Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) <wiseveri@student.ethz.ch>
Shawn Rosti <shawn.rosti@gmail.com>
Sherif Elhabbal <elhabbalsherif@gmail.com>
Shivani Parekh <shparekh@ucdavis.edu>
Shivani <shparekh@ucdavis.edu>
Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Somayeh Sardashti <somayeh@cs.wisc.edu>
Sooraj Puthoor <puthoorsooraj@gmail.com>
Sooraj Puthoor <Sooraj.Puthoor@amd.com>
Sophiane Senni <sophiane.senni@gmail.com>
Soumyaroop Roy <sroy@cse.usf.edu>
Srikant Bharadwaj <srikant.bharadwaj@amd.com>
@@ -275,13 +367,14 @@ Stanislaw Czerniawski <stacze01@arm.com>
Stephan Diestelhorst <stephan.diestelhorst@arm.com> Stephan Diestelhorst <stephan.diestelhorst@ARM.com>
Stephen Hines <hines@cs.fsu.edu>
Steve Raasch <sraasch@umich.edu>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) <stever@gmail.com>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
Stian Hvatum <stian@dream-web.no>
Sudhanshu Jha <sudhanshu.jha@arm.com>
Sujay Phadke <electronicsguy123@gmail.com>
Sungkeun Kim <ksungkeun84@tamu.edu>
Swapnil Haria <swapnilster@gmail.com> Swapnil Haria <swapnilh@cs.wisc.edu>
Taeho Kgil <tkgil@umich.edu>
Tao Zhang <tao.zhang.0924@gmail.com>
@@ -290,45 +383,50 @@ Tiago Mück <tiago.muck@arm.com> Tiago Muck <tiago.muck@arm.com>
Tim Harris <tharris@microsoft.com>
Timothy Hayes <timothy.hayes@arm.com>
Timothy M. Jones <timothy.jones@arm.com> Timothy Jones <timothy.jones@cl.cam.ac.uk>
Timothy M. Jones <timothy.jones@arm.com> Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <timothy.jones@cl.cam.ac.uk>
Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <tjones1@inf.ed.ac.uk>
Tom Jablin <tjablin@gmail.com>
Tommaso Marinelli <tommarin@ucm.es>
Tom Rollet <tom.rollet@huawei.com>
Tong Shen <endlessroad@google.com>
Tony Gutierrez <anthony.gutierrez@amd.com> Anthony Gutierrez <atgutier@umich.edu>
Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com>
Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
Travis Boraten <travis.boraten@amd.com>
Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com> Tuan Ta <tuan.ta@amd.com>
Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <tushar@csail.mit.edu>
Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
Umesh Bhaskar <umesh.b2006@gmail.com>
Uri Wiener <uri.wiener@arm.com>
Victor Garcia <victor.garcia@arm.com>
Vilas Sridharan <vilas.sridharan@gmail.com>
Vince Weaver <vince@csl.cornell.edu>
Vincentius Robby <acolyte@umich.edu>
Vince Weaver <vince@csl.cornell.edu>
vramadas95 <vramadas@wisc.edu>
vsoria <victor.soria@bsc.es>
Wade Walker <wade.walker@arm.com>
Wei-Han Chen <weihanchen@google.com>
Weiping Liao <weipingliao@google.com>
Wende Tan <twd2@163.com>
Wendy Elsasser <wendy.elsasser@arm.com>
William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
William Wang <william.wang@arm.com> William Wang <William.Wang@arm.com>
William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
Willy Wolff <willy.mh.wolff.ml@gmail.com>
Wing Li <wingers@google.com>
Xiangyu Dong <rioshering@gmail.com>
Xianwei Zhang <xianwei.zhang@amd.com>
Xianwei Zhang <xianwei.zhang.@amd.com> Xianwei Zhang <xianwei.zhang@amd.com>
Xiaoyu Ma <xiaoyuma@google.com>
Xin Ouyang <xin.ouyang@streamcomputing.com>
Xiongfei <xiongfei.liao@gmail.com>
Yasuko Eckert <yasuko.eckert@amd.com>
Yi Xiang <yix@colostate.edu>
Yen-lin Lai <yenlinlai@google.com>
Yifei Liu <liu.ad2039@gmail.com>
Yu-hsin Wang <yuhsingw@google.com>
yiwkd2 <yiwkd2@gmail.com>
Yi Xiang <yix@colostate.edu>
Yuan Yao <yuanyao@seas.harvard.edu>
Yuetsu Kodama <yuetsu.kodama@riken.jp> yuetsu.kodama <yuetsu.kodama@riken.jp>
Yu-hsin Wang <yuhsingw@google.com>
Zhang Zheng <perise@gmail.com>
Zhantong Qiu <ztqiu@ucdavis.edu>
Zhengrong Wang <seanzw@ucla.edu> seanzw <seanyukigeek@gmail.com>
zhongchengyong <zhongcy93@gmail.com>
Zicong Wang <wangzicong@nudt.edu.cn>
Éder F. Zulian <zulian@eit.uni-kl.de>
Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
jiegec <noc@jiegec.ac.cn>
m5test <m5test@zizzer>
Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
Mike Upton <michaelupton@gmail.com>
seanzw <seanyukigeek@gmail.com>
Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>

89
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,89 @@
# Copyright (c) 2022 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
minimum_pre_commit_version: "2.18"
default_language_version:
python: python3
exclude: |
(?x)^(
ext/.*|
build/.*|
src/systemc/ext/.*|
src/systemc/tests/.*/.*|
src/python/m5/ext/pyfdt/.*|
tests/.*/ref/.*
)$
default_stages: [commit]
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-json
- id: check-yaml
- id: check-added-large-files
- id: mixed-line-ending
args: [--fix=lf]
- id: check-case-conflict
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
- id: black
- repo: local
hooks:
- id: gem5-style-checker
name: gem5 style checker
entry: util/git-pre-commit.py
always_run: true
exclude: ".*"
language: system
description: 'The gem5 style checker hook.'
- id: gem5-commit-msg-checker
name: gem5 commit msg checker
entry: ext/git-commit-msg
language: system
stages: [commit-msg]
description: 'The gem5 commit message checker hook.'
- id: gerrit-commit-msg-job
name: gerrit commit message job
entry: util/gerrit-commit-msg-hook
language: system
stages: [commit-msg]
description: 'Adds Change-ID to the commit message. Needed by Gerrit.'

View File

@@ -136,6 +136,37 @@ rebasing and git, see the [git book].
[git book]: https://git-scm.com/book/en/v2/Git-Branching-Rebasing
Setting up pre-commit
---------------------
To help ensure the gem5 style guide is maintained, we use [pre-commit](
https://pre-commit.com) to run checks on changes to be contributed.
To setup pre-commit, run the following in your gem5 directory to install the
pre-commit and commit message hooks.
```sh
pip install pre-commit
pre-commit install -t pre-commit -t commit-msg
```
The hooks are also automatically installed when gem5 is compiled.
When you run a `git commit` command the pre-commit hook will run checks on your
committed code. The commit will be blocked if a check fails.
The same checks are run as part of Gerrit's CI tests (those required to obtain
a Verified label, necessary for a change to be accepted to the develop branch).
Therefore setting up pre-commit in your local gem5 development environment is
recommended.
You can automatically format files to pass the pre-commit tests by running:
```sh
pre-commit run --files <files to format>
```
Requirements for change descriptions
------------------------------------
To help reviewers and future contributors more easily understand and track

View File

@@ -1,3 +1,121 @@
# Version 22.1.0.0
This release has 500 contributions from 48 unique contributors and marks our second major release of 2022.
This release incorporates several new features, improvements, and bug fixes for the computer architecture reserach community.
See below for more details!
## New features and improvements
- The gem5 binary can now be compiled to include multiple ISA targets.
A compilation of gem5 which includes all gem5 ISAs can be created using: `scons build/ALL/gem5.opt`.
This will use the Ruby `MESI_Two_Level` cache coherence protocol by default, to use other protocols: `scons build/ALL/gem5.opt PROTOCOL=<other protocol>`.
The classic cache system may continue to be used regardless as to which Ruby cache coherence protocol is compiled.
- The `m5` Python module now includes functions to set exit events are particular simululation ticks:
- *setMaxTick(tick)* : Used to to specify the maximum simulation tick.
- *getMaxTick()* : Used to obtain the maximum simulation tick value.
- *getTicksUntilMax()*: Used to get the number of ticks remaining until the maximum tick is reached.
- *scheduleTickExitFromCurrent(tick)* : Used to schedule an exit exit event a specified number of ticks in the future.
- *scheduleTickExitAbsolute(tick)* : Used to schedule an exit event as a specified tick.
- We now include the `RiscvMatched` board as part of the gem5 stdlib.
This board is modeled after the [HiFive Unmatched board](https://www.sifive.com/boards/hifive-unmatched) and may be used to emulate its behavior.
See "configs/example/gem5_library/riscv-matched-fs.py" and "configs/example/gem5_library/riscv-matched-hello.py" for examples using this board.
- An API for [SimPoints](https://doi.org/10.1145/885651.781076) has been added.
SimPoints can substantially improve gem5 Simulation time by only simulating representative parts of a simulation then extrapolating statistical data accordingly.
Examples of using SimPoints with gem5 can be found in "configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py" and "configs/example/gem5_library/checkpoints/simpoints-se-restore.py".
- "Workloads" have been introduced to gem5.
Workloads have been incorporated into the gem5 Standard library.
They can be used specify the software to be run on a simulated system that come complete with input parameters and any other dependencies necessary to run a simuation on the target hardware.
At the level of the gem5 configuration script a user may specify a workload via a board's `set_workload` function.
For example, `set_workload(Workload("x86-ubuntu-18.04-boot"))` sets the board to use the "x86-ubuntu-18.04-boot" workload.
This workload specifies a boot consisting of the Linux 5.4.49 kernel then booting an Ubunutu 18.04 disk image, to exit upon booting.
Workloads are agnostic to underlying gem5 design and, via the gem5-resources infrastructure, will automatically retrieve all necessary kernels, disk-images, etc., necessary to execute.
Examples of using gem5 Workloads can be found in "configs/example/gem5_library/x86-ubuntu-ruby.py" and "configs/example/gem5_library/riscv-ubuntu-run.py".
- To aid gem5 developers, we have incorporated [pre-commit](https://pre-commit.com) checks into gem5.
These checks automatically enforce the gem5 style guide on Python files and a subset of other requirements (such as line length) on altered code prior to a `git commit`.
Users may install pre-commit by running `./util/pre-commit-install.sh`.
Passing these checks is a requirement to submit code to gem5 so installation is strongly advised.
- A multiprocessing module has been added.
This allows for multiple simulations to be run from a single gem5 execution via a single gem5 configuration script.
Example of usage found [in this commit message](https://gem5-review.googlesource.com/c/public/gem5/+/63432).
**Note: This feature is still in development.
While functional, it'll be subject to subtantial changes in future releases of gem5**.
- The stdlib's `ArmBoard` now supports Ruby caches.
- Due to numerious fixes and improvements, Ubuntu 22.04 can be booted as a gem5 workload, both in FS and SE mode.
- Substantial improvements have been made to gem5's GDB capabilities.
- The `HBM2Stack` has been added to the gem5 stdlib as a memory component.
- The `MinorCPU` has been fully incorporated into the gem5 Standard Library.
- We now allow for full-system simulation of GPU applications.
The introduction of GPU FS mode allows for the same use-cases as SE mode but reduces the requirement of specific host environments or usage of a Docker container.
The GPU FS mode also has improved simulated speed by functionally simulating memory copies, and provides an easier update path for gem5 developers.
An X86 host and KVM are required to run GPU FS mode.
## API (user facing) changes
- The default CPU Vendor String has been updated to `HygonGenuine`.
This is due to newer versions of GLIBC being more strict about checking current system's supported features.
The previous value, `M5 Simulator`, is not recognized as a valid vendor string and therefore GLIBC returns an error.
- [The stdlib's `_connect_things` funciton call has been moved from the `AbstractBoard`'s constructor to be run as board pre-instantiation process](https://gem5-review.googlesource.com/c/public/gem5/+/65051).
This is to overcome instances where stdlib components (memory, processor, and cache hierarhcy) require Board information known only after its construction.
**This change breaks cases where a user utilizes the stdlib `AbstractBoard` but does not use the stdlib `Simulator` module. This can be fixed by adding the `_pre_instantiate` function before `m5.instantiate`**.
An exception has been added which explains this fix, if this error occurs.
- The setting of checkpoints has been moved from the stdlib's "set_workload" functions to the `Simulator` module.
Setting of checkpoints via the stdlib's "set_workload" functions is now deprecated and will be removed in future releases of gem5.
- The gem5 namespace `Trace` has been renamed `trace` to conform to the gem5 style guide.
- Due to the allowing of multiple ISAs per gem5 build, the `TARGET_ISA` variable has been replaced with `USE_$(ISA)` variables.
For example, if a build contains both the X86 and ARM ISAs the `USE_X86` and `USE_ARM` variables will be set.
## Big Fixes
- Several compounding bugs were causing bugs with floating point operations within gem5 simulations.
These have been fixed.
- Certain emulated syscalls were behaving incorrectly when using RISC-V due to incorrect `open(2)` flag values.
These values have been fixed.
- The GIVv3 List register mapping has been fixed.
- Access permissions for GICv3 cpu registers have been fixed.
- In previous releases of gem5 the `sim_quantum` value was set for all cores when using the Standard Library.
This caused issues when setting exit events at a particular tick as it resulted in the exit being off by `sim_quantum`.
As such, the `sim_quantum` value is only when using KVM cores.
- PCI ranges in `VExpress_GEM5_Foundation` fixed.
- The `SwitchableProcessor` processor has been fixed to allow switching to a KVM core.
Previously the `SwitchableProcessor` only allowed a user to switch from a KVM core to a non-KVM core.
- The Standard Library has been fixed to permit multicore simulations in SE mode.
- [A bug was fixed in the rcr X86 instruction](https://gem5.atlassian.net/browse/GEM5-1265).
## Build related changes
- gem5 can now be compiled with Scons 4 build system.
- gem5 can now be compiled with Clang version 14 (minimum Clang version 6).
- gem5 can now be compiled with GCC Version 12 (minimum GCC version 7).
## Other minor updates
- The gem5 stdlib examples in "configs/example/gem5_library" have been updated to, where appropriate, use the stdlib's Simulator module.
These example configurations can be used for reference as to how `Simulator` module may be utilized in gem5.
- Granulated SGPR computation has been added for gfx9 gpu-compute.
- The stdlib statistics have been improved:
- A `get_simstats` function has been added to access statistics from the `Simulator` module.
- Statistics can be printed: `print(simstats.board.core.some_integer)`.
- GDB ports are now specified for each workload, as opposed to per-simulation run.
- The `m5` utility has been expanded to include "workbegin" and "workend" annotations.
This can be added with `m5 workbegin` and `m5 workend`.
- A `PrivateL1SharedL2CacheHierarchy` has been added to the Standard Library.
- A `GEM5_USE_PROXY` environment variable has been added.
This allows users to specify a socks5 proxy server to use when obtaining gem5 resources and the resources.json file.
It uses the format `<host>:<port>`.
- The fastmodel support has been improved to function with Linux Kernel 5.x.
- The `set_se_binary_workload` function now allows for the passing of input parameters to a binary workload.
- A functional CHI cache hierarchy has been added to the gem5 Standard Library: "src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py".
- The RISC-V K extension has been added.
It includes the following instructions:
- Zbkx: xperm8, xperm4
- Zknd: aes64ds, aes64dsm, aes64im, aes64ks1i, aes64ks2
- Zkne: aes64es, aes64esm, aes64ks1i, aes64ks2
- Zknh: sha256sig0, sha256sig1, sha256sum0, sha256sum1, sha512sig0, sha512sig1, sha512sum0, sha512sum1
- Zksed: sm4ed, sm4ks
- Zksh: sm3p0, sm3p1
# Version 22.0.0.2
**[HOTFIX]** This hotfix contains a set of critical fixes to be applied to gem5 v22.0.

View File

@@ -319,7 +319,10 @@ def config_embedded_python(env):
if conf.TryAction(f'@{python_config} --embed')[0]:
cmd.append('--embed')
def flag_filter(env, cmd_output):
def flag_filter(env, cmd_output, unique=True):
# Since this function does not use the `unique` param, one should not
# pass any value to this param.
assert(unique==True)
flags = cmd_output.split()
prefixes = ('-l', '-L', '-I')
is_useful = lambda x: any(x.startswith(prefix) for prefix in prefixes)
@@ -417,7 +420,6 @@ for variant_path in variant_paths:
conf.CheckLinkFlag('-Wl,--threads')
conf.CheckLinkFlag(
'-Wl,--thread-count=%d' % GetOption('num_jobs'))
else:
error('\n'.join((
"Don't know what compiler options to use for your compiler.",

View File

@@ -15,7 +15,7 @@ be built through SCons.
To build and run all the unit tests:
```shell
scons build/NULL/unittests.opt
scons build/ALL/unittests.opt
```
All unit tests should be run prior to posting a patch to
@@ -25,20 +25,20 @@ To compile and run just one set of tests (e.g. those declared within
`src/base/bitunion.test.cc`):
```shell
scons build/NULL/base/bitunion.test.opt
./build/NULL/base/bitunion.test.opt
scons build/ALL/base/bitunion.test.opt
./build/ALL/base/bitunion.test.opt
```
To list the available test functions from a test file:
```shell
./build/NULL/base/bitunion.test.opt --gtest_list_tests
./build/ALL/base/bitunion.test.opt --gtest_list_tests
```
To run a specific test function (e.g., BitUnionData.NormalBitfield):
```shell
./build/NULL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
./build/ALL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
```
# Running system-level tests
@@ -246,10 +246,9 @@ maintainer (see MAINTAINERS).*
## Running Tests in Parallel
Whimsy has support for parallel testing baked in. This system supports
running multiple suites at the same time on the same computer. To run
running multiple suites at the same time on the same computer. To run
suites in parallel, supply the `-t <number-tests>` flag to the run command.
For example, to run up to three test suites at the same time::
./main.py run --skip-build -t 3

7
build_opts/ALL Normal file
View File

@@ -0,0 +1,7 @@
USE_ARM_ISA = True
USE_MIPS_ISA = True
USE_POWER_ISA = True
USE_RISCV_ISA = True
USE_SPARC_ISA = True
USE_X86_ISA = True
PROTOCOL = 'MESI_Two_Level'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'arm'
USE_ARM_ISA = True
PROTOCOL = 'CHI'

View File

@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
TARGET_ISA = 'arm'
USE_ARM_ISA = True
PROTOCOL = 'MESI_Three_Level'

View File

@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
TARGET_ISA = 'arm'
USE_ARM_ISA = True
PROTOCOL = 'MESI_Three_Level_HTM'

View File

@@ -1,5 +1,5 @@
# Copyright (c) 2019 ARM Limited
# All rights reserved.
TARGET_ISA = 'arm'
USE_ARM_ISA = True
PROTOCOL = 'MOESI_hammer'

View File

@@ -1,4 +1,4 @@
PROTOCOL = 'GPU_VIPER'
TARGET_ISA = 'x86'
USE_X86_ISA = True
TARGET_GPU_ISA = 'gcn3'
BUILD_GPU = True

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL = 'Garnet_standalone'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'mips'
USE_MIPS_ISA = True
PROTOCOL = 'MI_example'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL='MI_example'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL = 'MESI_Two_Level'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL='MOESI_CMP_directory'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL='MOESI_CMP_token'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'null'
USE_NULL_ISA = True
PROTOCOL='MOESI_hammer'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'power'
USE_POWER_ISA = True
PROTOCOL = 'MI_example'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'riscv'
USE_RISCV_ISA = True
PROTOCOL = 'MI_example'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'sparc'
USE_SPARC_ISA = True
PROTOCOL = 'MI_example'

View File

@@ -1,4 +1,4 @@
PROTOCOL = 'GPU_VIPER'
TARGET_ISA = 'x86'
USE_X86_ISA = True
TARGET_GPU_ISA = 'vega'
BUILD_GPU = True

View File

@@ -1,3 +1,3 @@
TARGET_ISA = 'x86'
USE_X86_ISA = True
PROTOCOL = 'MESI_Two_Level'
NUMBER_BITS_PER_SET = '128'

View File

@@ -1,3 +1,3 @@
TARGET_ISA = 'x86'
USE_X86_ISA = True
PROTOCOL = 'MESI_Two_Level'
NUMBER_BITS_PER_SET = '128'

View File

@@ -1,2 +1,2 @@
TARGET_ISA = 'x86'
USE_X86_ISA = True
PROTOCOL = 'MI_example'

View File

@@ -1,2 +1,2 @@
PROTOCOL = 'MOESI_AMD_Base'
TARGET_ISA = 'x86'
USE_X86_ISA = True

View File

@@ -26,16 +26,17 @@
import array
import functools
def bytesToCppArray(code, symbol, data):
'''
"""
Output an array of bytes to a code formatter as a c++ array declaration.
'''
code('const std::uint8_t ${symbol}[] = {')
"""
code("const std::uint8_t ${symbol}[] = {")
code.indent()
step = 16
for i in range(0, len(data), step):
x = array.array('B', data[i:i+step])
strs = map(lambda i: f'{i},', x)
x = array.array("B", data[i : i + step])
strs = map(lambda i: f"{i},", x)
code(functools.reduce(lambda x, y: x + y, strs))
code.dedent()
code('};')
code("};")

View File

@@ -1,3 +1,15 @@
# Copyright (c) 2022 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006-2009 Nathan Binkert <nate@binkert.org>
# All rights reserved.
#
@@ -33,6 +45,7 @@ import inspect
import os
import re
class lookup(object):
def __init__(self, formatter, frame, *args, **kwargs):
self.frame = frame
@@ -52,10 +65,10 @@ class lookup(object):
if item in self.kwargs:
return self.kwargs[item]
if item == '__file__':
if item == "__file__":
return self.frame.f_code.co_filename
if item == '__line__':
if item == "__line__":
return self.frame.f_lineno
if self.formatter.locals and item in self.frame.f_locals:
@@ -77,6 +90,7 @@ class lookup(object):
pass
raise IndexError("Could not find '%s'" % item)
class code_formatter_meta(type):
pattern = r"""
(?:
@@ -90,44 +104,48 @@ class code_formatter_meta(type):
%(delim)s(?P<invalid>) # ill-formed delimiter exprs
)
"""
def __init__(cls, name, bases, dct):
super(code_formatter_meta, cls).__init__(name, bases, dct)
if 'pattern' in dct:
if "pattern" in dct:
pat = cls.pattern
else:
# tuple expansion to ensure strings are proper length
lb,rb = cls.braced
lb1,lb2,rb2,rb1 = cls.double_braced
lb, rb = cls.braced
lb1, lb2, rb2, rb1 = cls.double_braced
pat = code_formatter_meta.pattern % {
'delim' : re.escape(cls.delim),
'ident' : cls.ident,
'pos' : cls.pos,
'lb' : re.escape(lb),
'rb' : re.escape(rb),
'ldb' : re.escape(lb1+lb2),
'rdb' : re.escape(rb2+rb1),
}
"delim": re.escape(cls.delim),
"ident": cls.ident,
"pos": cls.pos,
"lb": re.escape(lb),
"rb": re.escape(rb),
"ldb": re.escape(lb1 + lb2),
"rdb": re.escape(rb2 + rb1),
}
cls.pattern = re.compile(pat, re.VERBOSE | re.DOTALL | re.MULTILINE)
class code_formatter(object, metaclass=code_formatter_meta):
delim = r'$'
ident = r'[_A-z]\w*'
pos = r'[0-9]+'
braced = r'{}'
double_braced = r'{{}}'
delim = r"$"
ident = r"[_A-z]\w*"
pos = r"[0-9]+"
braced = r"{}"
double_braced = r"{{}}"
globals = True
locals = True
fix_newlines = True
def __init__(self, *args, **kwargs):
self._data = []
self._dict = {}
self._indent_level = 0
self._indent_spaces = 4
self.globals = kwargs.pop('globals', type(self).globals)
self.locals = kwargs.pop('locals', type(self).locals)
self._fix_newlines = \
kwargs.pop('fix_newlines', type(self).fix_newlines)
self.globals = kwargs.pop("globals", type(self).globals)
self.locals = kwargs.pop("locals", type(self).locals)
self._fix_newlines = kwargs.pop(
"fix_newlines", type(self).fix_newlines
)
if args:
self.__call__(args)
@@ -159,38 +177,44 @@ class code_formatter(object, metaclass=code_formatter_meta):
# Add a comment to inform which file generated the generated file
# to make it easier to backtrack and modify generated code
frame = inspect.currentframe().f_back
if re.match('\.(cc|hh|c|h)', extension) is not None:
f.write(f'''/**
if re.match(r"^\.(cc|hh|c|h)$", extension) is not None:
f.write(
f"""/**
* DO NOT EDIT THIS FILE!
* File automatically generated by
* {frame.f_code.co_filename}:{frame.f_lineno}
*/
''')
elif re.match('\.py', extension) is not None:
f.write(f'''#
"""
)
elif re.match(r"^\.py$", extension) is not None:
f.write(
f"""#
# DO NOT EDIT THIS FILE!
# File automatically generated by
# {frame.f_code.co_filename}:{frame.f_lineno}
#
''')
elif re.match('\.html', extension) is not None:
f.write(f'''<!--
"""
)
elif re.match(r"^\.html$", extension) is not None:
f.write(
f"""<!--
DO NOT EDIT THIS FILE!
File automatically generated by
{frame.f_code.co_filename}:{frame.f_lineno}
-->
''')
"""
)
for data in self._data:
f.write(data)
f.close()
def __str__(self):
data = ''.join(self._data)
self._data = [ data ]
data = "".join(self._data)
self._data = [data]
return data
def __getitem__(self, item):
@@ -219,21 +243,21 @@ class code_formatter(object, metaclass=code_formatter_meta):
self._data.append(data)
return
initial_newline = not self._data or self._data[-1] == '\n'
initial_newline = not self._data or self._data[-1] == "\n"
for line in data.splitlines():
if line:
if self._indent_level:
self._data.append(' ' * self._indent_level)
self._data.append(" " * self._indent_level)
self._data.append(line)
if line or not initial_newline:
self._data.append('\n')
self._data.append("\n")
initial_newline = False
def __call__(self, *args, **kwargs):
if not args:
self._data.append('\n')
self._data.append("\n")
return
format = args[0]
@@ -242,51 +266,56 @@ class code_formatter(object, metaclass=code_formatter_meta):
frame = inspect.currentframe().f_back
l = lookup(self, frame, *args, **kwargs)
def convert(match):
ident = match.group('lone')
ident = match.group("lone")
# check for a lone identifier
if ident:
indent = match.group('indent') # must be spaces
lone = '%s' % (l[ident], )
indent = match.group("indent") # must be spaces
lone = "%s" % (l[ident],)
def indent_lines(gen):
for line in gen:
yield indent
yield line
return ''.join(indent_lines(lone.splitlines(True)))
return "".join(indent_lines(lone.splitlines(True)))
# check for an identifier, braced or not
ident = match.group('ident') or match.group('b_ident')
ident = match.group("ident") or match.group("b_ident")
if ident is not None:
return '%s' % (l[ident], )
return "%s" % (l[ident],)
# check for a positional parameter, braced or not
pos = match.group('pos') or match.group('b_pos')
pos = match.group("pos") or match.group("b_pos")
if pos is not None:
pos = int(pos)
if pos > len(args):
raise ValueError \
('Positional parameter #%d not found in pattern' % pos,
code_formatter.pattern)
return '%s' % (args[int(pos)], )
raise ValueError(
"Positional parameter #%d not found in pattern" % pos,
code_formatter.pattern,
)
return "%s" % (args[int(pos)],)
# check for a double braced expression
eval_expr = match.group('eval')
eval_expr = match.group("eval")
if eval_expr is not None:
result = eval(eval_expr, {}, l)
return '%s' % (result, )
return "%s" % (result,)
# check for an escaped delimiter
if match.group('escaped') is not None:
return '$'
if match.group("escaped") is not None:
return "$"
# At this point, we have to match invalid
if match.group('invalid') is None:
if match.group("invalid") is None:
# didn't match invalid!
raise ValueError('Unrecognized named group in pattern',
code_formatter.pattern)
raise ValueError(
"Unrecognized named group in pattern",
code_formatter.pattern,
)
i = match.start('invalid')
i = match.start("invalid")
if i == 0:
colno = 1
lineno = 1
@@ -295,52 +324,64 @@ class code_formatter(object, metaclass=code_formatter_meta):
colno = i - sum(len(z) for z in lines)
lineno = len(lines)
raise ValueError('Invalid format string: line %d, col %d' %
(lineno, colno))
raise ValueError(
"Invalid format string: line %d, col %d" % (lineno, colno)
)
d = code_formatter.pattern.sub(convert, format)
self._append(d)
__all__ = [ "code_formatter" ]
if __name__ == '__main__':
__all__ = ["code_formatter"]
if __name__ == "__main__":
from .code_formatter import code_formatter
f = code_formatter()
class Foo(dict):
def __init__(self, **kwargs):
self.update(kwargs)
def __getattr__(self, attr):
return self[attr]
x = "this is a test"
l = [ [Foo(x=[Foo(y=9)])] ]
l = [[Foo(x=[Foo(y=9)])]]
y = code_formatter()
y('''
y(
"""
{
this_is_a_test();
}
''')
f(' $y')
f('''$__file__:$__line__
{''')
"""
)
f(" $y")
f(
"""$__file__:$__line__
{"""
)
f("${{', '.join(str(x) for x in range(4))}}")
f('${x}')
f('$x')
f("${x}")
f("$x")
f.indent()
for i in range(5):
f('$x')
f('$i')
f('$0', "zero")
f('$1 $0', "zero", "one")
f('${0}', "he went")
f('${0}asdf', "he went")
f("$x")
f("$i")
f("$0", "zero")
f("$1 $0", "zero", "one")
f("${0}", "he went")
f("${0}asdf", "he went")
f.dedent()
f('''
f(
"""
${{l[0][0]["x"][0].y}}
}
''', 1, 9)
""",
1,
9,
)
print(f, end=' ')
print(f, end=" ")

View File

@@ -46,8 +46,8 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the simobject belongs to')
parser.add_argument('cxx_config_cc', help='cxx config cc file to generate')
parser.add_argument("modpath", help="module the simobject belongs to")
parser.add_argument("cxx_config_cc", help="cxx config cc file to generate")
args = parser.parse_args()
@@ -63,22 +63,25 @@ import m5.params
code = code_formatter()
entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
param_class = '%sCxxConfigParams' % sim_object_name
entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
param_class = "%sCxxConfigParams" % sim_object_name
def cxx_bool(b):
return 'true' if b else 'false'
return "true" if b else "false"
code('#include "params/%s.hh"' % sim_object_name)
for param in sim_object._params.values():
if isSimObjectClass(param.ptype):
code('#include "%s"' % param.ptype._value_dict['cxx_header'])
code('#include "%s"' % param.ptype._value_dict["cxx_header"])
code('#include "params/%s.hh"' % param.ptype.__name__)
else:
param.ptype.cxx_ini_predecls(code)
code('''#include "${{sim_object._value_dict['cxx_header']}}"
code(
"""#include "${{sim_object._value_dict['cxx_header']}}"
#include "base/str.hh"
#include "cxx_config/${sim_object_name}.hh"
@@ -87,34 +90,39 @@ namespace gem5
${param_class}::DirectoryEntry::DirectoryEntry()
{
''')
"""
)
code.indent()
for param in sim_object._params.values():
is_vector = isinstance(param, m5.params.VectorParamDesc)
is_simobj = issubclass(param.ptype, m5.SimObject.SimObject)
code('parameters["%s"] = new ParamDesc("%s", %s, %s);' %
(param.name, param.name, cxx_bool(is_vector),
cxx_bool(is_simobj)));
code(
'parameters["%s"] = new ParamDesc("%s", %s, %s);'
% (param.name, param.name, cxx_bool(is_vector), cxx_bool(is_simobj))
)
for port in sim_object._ports.values():
is_vector = isinstance(port, m5.params.VectorPort)
is_requestor = port.role == 'GEM5 REQUESTOR'
is_requestor = port.role == "GEM5 REQUESTOR"
code('ports["%s"] = new PortDesc("%s", %s, %s);' %
(port.name, port.name, cxx_bool(is_vector),
cxx_bool(is_requestor)))
code(
'ports["%s"] = new PortDesc("%s", %s, %s);'
% (port.name, port.name, cxx_bool(is_vector), cxx_bool(is_requestor))
)
code.dedent()
code('''}
code(
"""}
bool
${param_class}::setSimObject(const std::string &name, SimObject *simObject)
{
bool ret = true;
if (false) {
''')
"""
)
code.indent()
for param in sim_object._params.values():
@@ -124,14 +132,17 @@ for param in sim_object._params.values():
if is_simobj and not is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
code('this->${{param.name}} = '
'dynamic_cast<${{param.ptype.cxx_type}}>(simObject);')
code('if (simObject && !this->${{param.name}})')
code(' ret = false;')
code(
"this->${{param.name}} = "
"dynamic_cast<${{param.ptype.cxx_type}}>(simObject);"
)
code("if (simObject && !this->${{param.name}})")
code(" ret = false;")
code.dedent()
code.dedent()
code('''
code(
"""
} else {
ret = false;
}
@@ -146,7 +157,8 @@ ${param_class}::setSimObjectVector(const std::string &name,
bool ret = true;
if (false) {
''')
"""
)
code.indent()
for param in sim_object._params.values():
@@ -156,23 +168,28 @@ for param in sim_object._params.values():
if is_simobj and is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
code('this->${{param.name}}.clear();')
code('for (auto i = simObjects.begin(); '
'ret && i != simObjects.end(); i ++)')
code('{')
code("this->${{param.name}}.clear();")
code(
"for (auto i = simObjects.begin(); "
"ret && i != simObjects.end(); i ++)"
)
code("{")
code.indent()
code('${{param.ptype.cxx_type}} object = '
'dynamic_cast<${{param.ptype.cxx_type}}>(*i);')
code('if (*i && !object)')
code(' ret = false;')
code('else')
code(' this->${{param.name}}.push_back(object);')
code(
"${{param.ptype.cxx_type}} object = "
"dynamic_cast<${{param.ptype.cxx_type}}>(*i);"
)
code("if (*i && !object)")
code(" ret = false;")
code("else")
code(" this->${{param.name}}.push_back(object);")
code.dedent()
code('}')
code("}")
code.dedent()
code.dedent()
code('''
code(
"""
} else {
ret = false;
}
@@ -193,7 +210,8 @@ ${param_class}::setParam(const std::string &name,
bool ret = true;
if (false) {
''')
"""
)
code.indent()
for param in sim_object._params.values():
@@ -203,12 +221,14 @@ for param in sim_object._params.values():
if not is_simobj and not is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
param.ptype.cxx_ini_parse(code,
'value', 'this->%s' % param.name, 'ret =')
param.ptype.cxx_ini_parse(
code, "value", "this->%s" % param.name, "ret ="
)
code.dedent()
code.dedent()
code('''
code(
"""
} else {
ret = false;
}
@@ -223,7 +243,8 @@ ${param_class}::setParamVector(const std::string &name,
bool ret = true;
if (false) {
''')
"""
)
code.indent()
for param in sim_object._params.values():
@@ -233,22 +254,23 @@ for param in sim_object._params.values():
if not is_simobj and is_vector:
code('} else if (name == "${{param.name}}") {')
code.indent()
code('${{param.name}}.clear();')
code('for (auto i = values.begin(); '
'ret && i != values.end(); i ++)')
code('{')
code("${{param.name}}.clear();")
code(
"for (auto i = values.begin(); " "ret && i != values.end(); i ++)"
)
code("{")
code.indent()
code('${{param.ptype.cxx_type}} elem;')
param.ptype.cxx_ini_parse(code,
'*i', 'elem', 'ret =')
code('if (ret)')
code(' this->${{param.name}}.push_back(elem);')
code("${{param.ptype.cxx_type}} elem;")
param.ptype.cxx_ini_parse(code, "*i", "elem", "ret =")
code("if (ret)")
code(" this->${{param.name}}.push_back(elem);")
code.dedent()
code('}')
code("}")
code.dedent()
code.dedent()
code('''
code(
"""
} else {
ret = false;
}
@@ -263,15 +285,17 @@ ${param_class}::setPortConnectionCount(const std::string &name,
bool ret = true;
if (false) {
''')
"""
)
code.indent()
for port in sim_object._ports.values():
code('} else if (name == "${{port.name}}") {')
code(' this->port_${{port.name}}_connection_count = count;')
code(" this->port_${{port.name}}_connection_count = count;")
code.dedent()
code('''
code(
"""
} else {
ret = false;
}
@@ -282,18 +306,21 @@ code('''
SimObject *
${param_class}::simObjectCreate()
{
''')
"""
)
code.indent()
if hasattr(sim_object, 'abstract') and sim_object.abstract:
code('return nullptr;')
if hasattr(sim_object, "abstract") and sim_object.abstract:
code("return nullptr;")
else:
code('return this->create();')
code("return this->create();")
code.dedent()
code('''}
code(
"""}
} // namespace gem5
''')
"""
)
code.write(args.cxx_config_cc)

View File

@@ -46,8 +46,8 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the simobject belongs to')
parser.add_argument('cxx_config_hh', help='cxx config header file to generate')
parser.add_argument("modpath", help="module the simobject belongs to")
parser.add_argument("cxx_config_hh", help="cxx config header file to generate")
args = parser.parse_args()
@@ -60,10 +60,11 @@ sim_object = getattr(module, sim_object_name)
code = code_formatter()
entry_class = 'CxxConfigDirectoryEntry_%s' % sim_object_name
param_class = '%sCxxConfigParams' % sim_object_name
entry_class = "CxxConfigDirectoryEntry_%s" % sim_object_name
param_class = "%sCxxConfigParams" % sim_object_name
code('''#include "params/${sim_object_name}.hh"
code(
"""#include "params/${sim_object_name}.hh"
#include "sim/cxx_config.hh"
@@ -110,6 +111,7 @@ class ${param_class} : public CxxConfigParams, public ${sim_object_name}Params
};
} // namespace gem5
''')
"""
)
code.write(args.cxx_config_hh)

View File

@@ -44,35 +44,41 @@ parser = argparse.ArgumentParser()
parser.add_argument("hh", help="the path of the debug flag header file")
parser.add_argument("name", help="the name of the debug flag")
parser.add_argument("desc", help="a description of the debug flag")
parser.add_argument("fmt",
help="whether the flag is a format flag (True or False)")
parser.add_argument("components",
help="components of a compound flag, if applicable, joined with :")
parser.add_argument(
"fmt", help="whether the flag is a format flag (True or False)"
)
parser.add_argument(
"components",
help="components of a compound flag, if applicable, joined with :",
)
args = parser.parse_args()
fmt = args.fmt.lower()
if fmt == 'true':
if fmt == "true":
fmt = True
elif fmt == 'false':
elif fmt == "false":
fmt = False
else:
print(f'Unrecognized "FMT" value {fmt}', file=sys.stderr)
sys.exit(1)
components = args.components.split(':') if args.components else []
components = args.components.split(":") if args.components else []
code = code_formatter()
code('''
code(
"""
#ifndef __DEBUG_${{args.name}}_HH__
#define __DEBUG_${{args.name}}_HH__
#include "base/compiler.hh" // For namespace deprecation
#include "base/debug.hh"
''')
"""
)
for flag in components:
code('#include "debug/${flag}.hh"')
code('''
code(
"""
namespace gem5
{
@@ -82,14 +88,16 @@ namespace debug
namespace unions
{
''')
"""
)
# Use unions to prevent debug flags from being destructed. It's the
# responsibility of the programmer to handle object destruction for members
# of the union. We purposefully leave that destructor empty so that we can
# use debug flags even in the destructors of other objects.
if components:
code('''
code(
"""
inline union ${{args.name}}
{
~${{args.name}}() {}
@@ -100,9 +108,11 @@ inline union ${{args.name}}
}
};
} ${{args.name}};
''')
"""
)
else:
code('''
code(
"""
inline union ${{args.name}}
{
~${{args.name}}() {}
@@ -110,18 +120,21 @@ inline union ${{args.name}}
"${{args.name}}", "${{args.desc}}", ${{"true" if fmt else "false"}}
};
} ${{args.name}};
''')
"""
)
code('''
code(
"""
} // namespace unions
inline constexpr const auto& ${{args.name}} =
inline constexpr const auto& ${{args.name}} =
::gem5::debug::unions::${{args.name}}.${{args.name}};
} // namespace debug
} // namespace gem5
#endif // __DEBUG_${{args.name}}_HH__
''')
"""
)
code.write(args.hh)

View File

@@ -46,17 +46,18 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the enum belongs to')
parser.add_argument('enum_cc', help='enum cc file to generate')
parser.add_argument('use_python',
help='whether python is enabled in gem5 (True or False)')
parser.add_argument("modpath", help="module the enum belongs to")
parser.add_argument("enum_cc", help="enum cc file to generate")
parser.add_argument(
"use_python", help="whether python is enabled in gem5 (True or False)"
)
args = parser.parse_args()
use_python = args.use_python.lower()
if use_python == 'true':
if use_python == "true":
use_python = True
elif use_python == 'false':
elif use_python == "false":
use_python = False
else:
print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -75,41 +76,46 @@ wrapper_name = enum.wrapper_name
file_name = enum.__name__
name = enum.__name__ if enum.enum_name is None else enum.enum_name
code('''#include "base/compiler.hh"
code(
"""#include "base/compiler.hh"
#include "enums/$file_name.hh"
namespace gem5
{
''')
"""
)
if enum.wrapper_is_struct:
code('const char *${wrapper_name}::${name}Strings'
'[Num_${name}] =')
code("const char *${wrapper_name}::${name}Strings" "[Num_${name}] =")
else:
if enum.is_class:
code('''\
code(
"""\
const char *${name}Strings[static_cast<int>(${name}::Num_${name})] =
''')
"""
)
else:
code('''GEM5_DEPRECATED_NAMESPACE(Enums, enums);
code(
"""GEM5_DEPRECATED_NAMESPACE(Enums, enums);
namespace enums
{''')
{"""
)
code.indent(1)
code('const char *${name}Strings[Num_${name}] =')
code("const char *${name}Strings[Num_${name}] =")
code('{')
code("{")
code.indent(1)
for val in enum.vals:
code('"$val",')
code.dedent(1)
code('};')
code("};")
if not enum.wrapper_is_struct and not enum.is_class:
code.dedent(1)
code('} // namespace enums')
code("} // namespace enums")
code('} // namespace gem5')
code("} // namespace gem5")
if use_python:
@@ -118,7 +124,8 @@ if use_python:
enum_name = enum.__name__ if enum.enum_name is None else enum.enum_name
wrapper_name = enum_name if enum.is_class else enum.wrapper_name
code('''#include "pybind11/pybind11.h"
code(
"""#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include <sim/init.hh>
@@ -133,7 +140,8 @@ module_init(py::module_ &m_internal)
{
py::module_ m = m_internal.def_submodule("enum_${name}");
''')
"""
)
if enum.is_class:
code('py::enum_<${enum_name}>(m, "enum_${name}")')
else:
@@ -145,16 +153,18 @@ module_init(py::module_ &m_internal)
code('.value("${val}", ${wrapper_name}::${val})')
code('.value("Num_${name}", ${wrapper_name}::Num_${enum_name})')
if not enum.is_class:
code('.export_values()')
code(';')
code(".export_values()")
code(";")
code.dedent()
code('}')
code("}")
code.dedent()
code('''
code(
"""
static EmbeddedPyBind embed_enum("enum_${name}", module_init);
} // namespace gem5
''')
"""
)
code.write(args.enum_cc)

View File

@@ -46,8 +46,8 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the enum belongs to')
parser.add_argument('enum_hh', help='enum header file to generate')
parser.add_argument("modpath", help="module the enum belongs to")
parser.add_argument("enum_hh", help="enum header file to generate")
args = parser.parse_args()
@@ -64,53 +64,61 @@ code = code_formatter()
# Note that we wrap the enum in a class/struct to act as a namespace,
# so that the enum strings can be brief w/o worrying about collisions.
wrapper_name = enum.wrapper_name
wrapper = 'struct' if enum.wrapper_is_struct else 'namespace'
wrapper = "struct" if enum.wrapper_is_struct else "namespace"
name = enum.__name__ if enum.enum_name is None else enum.enum_name
idem_macro = '__ENUM__%s__%s__' % (wrapper_name, name)
idem_macro = "__ENUM__%s__%s__" % (wrapper_name, name)
code('''\
code(
"""\
#ifndef $idem_macro
#define $idem_macro
namespace gem5
{
''')
"""
)
if enum.is_class:
code('''\
code(
"""\
enum class $name
{
''')
"""
)
else:
code('''\
code(
"""\
$wrapper $wrapper_name {
enum $name
{
''')
"""
)
code.indent(1)
code.indent(1)
for val in enum.vals:
code('$val = ${{enum.map[val]}},')
code('Num_$name = ${{len(enum.vals)}}')
code("$val = ${{enum.map[val]}},")
code("Num_$name = ${{len(enum.vals)}}")
code.dedent(1)
code('};')
code("};")
if enum.is_class:
code('''\
code(
"""\
extern const char *${name}Strings[static_cast<int>(${name}::Num_${name})];
''')
"""
)
elif enum.wrapper_is_struct:
code('static const char *${name}Strings[Num_${name}];')
code("static const char *${name}Strings[Num_${name}];")
else:
code('extern const char *${name}Strings[Num_${name}];')
code("extern const char *${name}Strings[Num_${name}];")
if not enum.is_class:
code.dedent(1)
code('}; // $wrapper_name')
code("}; // $wrapper_name")
code()
code('} // namespace gem5')
code("} // namespace gem5")
code()
code('#endif // $idem_macro')
code("#endif // $idem_macro")
code.write(args.enum_hh)

View File

@@ -29,73 +29,77 @@ import os
import ply.lex
import ply.yacc
class ParseError(Exception):
def __init__(self, message, token=None):
Exception.__init__(self, message)
self.token = token
class Grammar(object):
def setupLexerFactory(self, **kwargs):
if 'module' in kwargs:
if "module" in kwargs:
raise AttributeError("module is an illegal attribute")
self.lex_kwargs = kwargs
def setupParserFactory(self, **kwargs):
if 'module' in kwargs:
if "module" in kwargs:
raise AttributeError("module is an illegal attribute")
if 'output' in kwargs:
dir,tab = os.path.split(output)
if not tab.endswith('.py'):
raise AttributeError('The output file must end with .py')
kwargs['outputdir'] = dir
kwargs['tabmodule'] = tab[:-3]
if "output" in kwargs:
dir, tab = os.path.split(output)
if not tab.endswith(".py"):
raise AttributeError("The output file must end with .py")
kwargs["outputdir"] = dir
kwargs["tabmodule"] = tab[:-3]
self.yacc_kwargs = kwargs
def __getattr__(self, attr):
if attr == 'lexers':
if attr == "lexers":
self.lexers = []
return self.lexers
if attr == 'lex_kwargs':
if attr == "lex_kwargs":
self.setupLexerFactory()
return self.lex_kwargs
if attr == 'yacc_kwargs':
if attr == "yacc_kwargs":
self.setupParserFactory()
return self.yacc_kwargs
if attr == 'lex':
if attr == "lex":
self.lex = ply.lex.lex(module=self, **self.lex_kwargs)
return self.lex
if attr == 'yacc':
if attr == "yacc":
self.yacc = ply.yacc.yacc(module=self, **self.yacc_kwargs)
return self.yacc
if attr == 'current_lexer':
if attr == "current_lexer":
if not self.lexers:
return None
return self.lexers[-1][0]
if attr == 'current_source':
if attr == "current_source":
if not self.lexers:
return '<none>'
return "<none>"
return self.lexers[-1][1]
if attr == 'current_line':
if attr == "current_line":
if not self.lexers:
return -1
return self.current_lexer.lineno
raise AttributeError(
"'%s' object has no attribute '%s'" % (type(self), attr))
"'%s' object has no attribute '%s'" % (type(self), attr)
)
def parse_string(self, data, source='<string>', debug=None, tracking=0):
def parse_string(self, data, source="<string>", debug=None, tracking=0):
if not isinstance(data, str):
raise AttributeError(
"argument must be a string, was '%s'" % type(f))
"argument must be a string, was '%s'" % type(f)
)
lexer = self.lex.clone()
lexer.input(data)
@@ -114,24 +118,32 @@ class Grammar(object):
def parse_file(self, f, **kwargs):
if isinstance(f, str):
source = f
f = open(f, 'r')
f = open(f, "r")
elif isinstance(f, file):
source = f.name
else:
raise AttributeError(
"argument must be either a string or file, was '%s'" % type(f))
"argument must be either a string or file, was '%s'" % type(f)
)
return self.parse_string(f.read(), source, **kwargs)
def p_error(self, t):
if t:
msg = "Syntax error at %s:%d:%d\n>>%s<<" % \
(self.current_source, t.lineno, t.lexpos + 1, t.value)
msg = "Syntax error at %s:%d:%d\n>>%s<<" % (
self.current_source,
t.lineno,
t.lexpos + 1,
t.value,
)
else:
msg = "Syntax error at end of %s" % (self.current_source, )
msg = "Syntax error at end of %s" % (self.current_source,)
raise ParseError(msg, t)
def t_error(self, t):
msg = "Illegal character %s @ %d:%d" % \
(repr(t.value[0]), t.lineno, t.lexpos)
msg = "Illegal character %s @ %d:%d" % (
repr(t.value[0]),
t.lineno,
t.lexpos,
)
raise ParseError(msg, t)

View File

@@ -42,8 +42,8 @@ import sys
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('info_py', help='info.py file path')
parser.add_argument('files', help='file to include in info.py', nargs='*')
parser.add_argument("info_py", help="info.py file path")
parser.add_argument("files", help="file to include in info.py", nargs="*")
args = parser.parse_args()
@@ -52,8 +52,8 @@ code = code_formatter()
for source in args.files:
src = os.path.basename(source)
with open(source, 'r') as f:
data = ''.join(f)
code('${src} = ${{repr(data)}}')
with open(source, "r") as f:
data = "".join(f)
code("${src} = ${{repr(data)}}")
code.write(args.info_py)

View File

@@ -67,16 +67,17 @@ if len(sys.argv) < 4:
_, cpp, python, modpath, abspath = sys.argv
with open(python, 'r') as f:
with open(python, "r") as f:
src = f.read()
compiled = compile(src, python, 'exec')
compiled = compile(src, python, "exec")
marshalled = marshal.dumps(compiled)
compressed = zlib.compress(marshalled)
code = code_formatter()
code('''\
code(
"""\
#include "python/embedded.hh"
namespace gem5
@@ -84,14 +85,16 @@ namespace gem5
namespace
{
''')
"""
)
bytesToCppArray(code, 'embedded_module_data', compressed)
bytesToCppArray(code, "embedded_module_data", compressed)
# The name of the EmbeddedPython object doesn't matter since it's in an
# anonymous namespace, and it's constructor takes care of installing it into a
# global list.
code('''
code(
"""
EmbeddedPython embedded_module_info(
"${abspath}",
"${modpath}",
@@ -101,6 +104,7 @@ EmbeddedPython embedded_module_info(
} // anonymous namespace
} // namespace gem5
''')
"""
)
code.write(cpp)

View File

@@ -46,17 +46,18 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the simobject belongs to')
parser.add_argument('param_cc', help='parameter cc file to generate')
parser.add_argument('use_python',
help='whether python is enabled in gem5 (True or False)')
parser.add_argument("modpath", help="module the simobject belongs to")
parser.add_argument("param_cc", help="parameter cc file to generate")
parser.add_argument(
"use_python", help="whether python is enabled in gem5 (True or False)"
)
args = parser.parse_args()
use_python = args.use_python.lower()
if use_python == 'true':
if use_python == "true":
use_python = True
elif use_python == 'false':
elif use_python == "false":
use_python = False
else:
print(f'Unrecognized "use_python" value {use_python}', file=sys.stderr)
@@ -64,7 +65,7 @@ else:
basename = os.path.basename(args.param_cc)
no_ext = os.path.splitext(basename)[0]
sim_object_name = '_'.join(no_ext.split('_')[1:])
sim_object_name = "_".join(no_ext.split("_")[1:])
importer.install()
module = importlib.import_module(args.modpath)
@@ -80,14 +81,16 @@ py_class_name = sim_object.pybind_class
# the object itself, not including inherited params (which
# will also be inherited from the base class's param struct
# here). Sort the params based on their key
params = list(map(lambda k_v: k_v[1],
sorted(sim_object._params.local.items())))
params = list(
map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
)
ports = sim_object._ports.local
# only include pybind if python is enabled in the build
if use_python:
code('''#include "pybind11/pybind11.h"
code(
"""#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include <type_traits>
@@ -99,9 +102,11 @@ if use_python:
#include "${{sim_object.cxx_header}}"
''')
"""
)
else:
code('''
code(
"""
#include <type_traits>
#include "base/compiler.hh"
@@ -109,13 +114,15 @@ else:
#include "${{sim_object.cxx_header}}"
''')
"""
)
# only include the python params code if python is enabled.
if use_python:
for param in params:
param.pybind_predecls(code)
code('''namespace py = pybind11;
code(
"""namespace py = pybind11;
namespace gem5
{
@@ -124,39 +131,48 @@ static void
module_init(py::module_ &m_internal)
{
py::module_ m = m_internal.def_submodule("param_${sim_object}");
''')
"""
)
code.indent()
if sim_object._base:
code('py::class_<${sim_object}Params, ' \
'${{sim_object._base.type}}Params, ' \
'std::unique_ptr<${{sim_object}}Params, py::nodelete>>(' \
'm, "${sim_object}Params")')
code(
"py::class_<${sim_object}Params, "
"${{sim_object._base.type}}Params, "
"std::unique_ptr<${{sim_object}}Params, py::nodelete>>("
'm, "${sim_object}Params")'
)
else:
code('py::class_<${sim_object}Params, ' \
'std::unique_ptr<${sim_object}Params, py::nodelete>>(' \
'm, "${sim_object}Params")')
code(
"py::class_<${sim_object}Params, "
"std::unique_ptr<${sim_object}Params, py::nodelete>>("
'm, "${sim_object}Params")'
)
code.indent()
if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
code('.def(py::init<>())')
if not hasattr(sim_object, "abstract") or not sim_object.abstract:
code(".def(py::init<>())")
code('.def("create", &${sim_object}Params::create)')
param_exports = sim_object.cxx_param_exports + [
PyBindProperty(k)
for k, v in sorted(sim_object._params.local.items())
] + [
PyBindProperty(f"port_{port.name}_connection_count")
for port in ports.values()
]
param_exports = (
sim_object.cxx_param_exports
+ [
PyBindProperty(k)
for k, v in sorted(sim_object._params.local.items())
]
+ [
PyBindProperty(f"port_{port.name}_connection_count")
for port in ports.values()
]
)
for exp in param_exports:
exp.export(code, f"{sim_object}Params")
code(';')
code(";")
code()
code.dedent()
bases = []
if 'cxx_base' in sim_object._value_dict:
if "cxx_base" in sim_object._value_dict:
# If the c++ base class implied by python inheritance was
# overridden, use that value.
if sim_object.cxx_base:
@@ -170,32 +186,39 @@ py::module_ m = m_internal.def_submodule("param_${sim_object}");
if bases:
base_str = ", ".join(bases)
code('py::class_<${{sim_object.cxx_class}}, ${base_str}, ' \
'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
'm, "${py_class_name}")')
code(
"py::class_<${{sim_object.cxx_class}}, ${base_str}, "
"std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
'm, "${py_class_name}")'
)
else:
code('py::class_<${{sim_object.cxx_class}}, ' \
'std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>(' \
'm, "${py_class_name}")')
code(
"py::class_<${{sim_object.cxx_class}}, "
"std::unique_ptr<${{sim_object.cxx_class}}, py::nodelete>>("
'm, "${py_class_name}")'
)
code.indent()
for exp in sim_object.cxx_exports:
exp.export(code, sim_object.cxx_class)
code(';')
code(";")
code.dedent()
code()
code.dedent()
code('}')
code("}")
code()
code('static EmbeddedPyBind '
'embed_obj("${0}", module_init, "${1}");',
sim_object, sim_object._base.type if sim_object._base else "")
code(
"static EmbeddedPyBind " 'embed_obj("${0}", module_init, "${1}");',
sim_object,
sim_object._base.type if sim_object._base else "",
)
code()
code('} // namespace gem5')
code("} // namespace gem5")
# include the create() methods whether or not python is enabled.
if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
if 'type' in sim_object.__dict__:
code('''
if not hasattr(sim_object, "abstract") or not sim_object.abstract:
if "type" in sim_object.__dict__:
code(
"""
namespace gem5
{
@@ -268,6 +291,7 @@ Dummy${sim_object}Shunt<${{sim_object.cxx_class}}>::Params::create() const
}
} // namespace gem5
''')
"""
)
code.write(args.param_cc)

View File

@@ -46,8 +46,8 @@ import importer
from code_formatter import code_formatter
parser = argparse.ArgumentParser()
parser.add_argument('modpath', help='module the simobject belongs to')
parser.add_argument('param_hh', help='parameter header file to generate')
parser.add_argument("modpath", help="module the simobject belongs to")
parser.add_argument("param_hh", help="parameter header file to generate")
args = parser.parse_args()
@@ -67,8 +67,9 @@ code = code_formatter()
# the object itself, not including inherited params (which
# will also be inherited from the base class's param struct
# here). Sort the params based on their key
params = list(map(lambda k_v: k_v[1],
sorted(sim_object._params.local.items())))
params = list(
map(lambda k_v: k_v[1], sorted(sim_object._params.local.items()))
)
ports = sim_object._ports.local
try:
ptypes = [p.ptype for p in params]
@@ -79,41 +80,44 @@ except:
warned_about_nested_templates = False
class CxxClass(object):
def __init__(self, sig, template_params=[]):
# Split the signature into its constituent parts. This could
# potentially be done with regular expressions, but
# it's simple enough to pick appart a class signature
# manually.
parts = sig.split('<', 1)
parts = sig.split("<", 1)
base = parts[0]
t_args = []
if len(parts) > 1:
# The signature had template arguments.
text = parts[1].rstrip(' \t\n>')
arg = ''
text = parts[1].rstrip(" \t\n>")
arg = ""
# Keep track of nesting to avoid splitting on ","s embedded
# in the arguments themselves.
depth = 0
for c in text:
if c == '<':
if c == "<":
depth = depth + 1
if depth > 0 and not warned_about_nested_templates:
warned_about_nested_templates = True
print('Nested template argument in cxx_class.'
' This feature is largely untested and '
' may not work.')
elif c == '>':
print(
"Nested template argument in cxx_class."
" This feature is largely untested and "
" may not work."
)
elif c == ">":
depth = depth - 1
elif c == ',' and depth == 0:
elif c == "," and depth == 0:
t_args.append(arg.strip())
arg = ''
arg = ""
else:
arg = arg + c
if arg:
t_args.append(arg.strip())
# Split the non-template part on :: boundaries.
class_path = base.split('::')
class_path = base.split("::")
# The namespaces are everything except the last part of the class path.
self.namespaces = class_path[:-1]
@@ -125,7 +129,7 @@ class CxxClass(object):
# Iterate through the template arguments and their values. This
# will likely break if parameter packs are used.
for arg, param in zip(t_args, template_params):
type_keys = ('class', 'typename')
type_keys = ("class", "typename")
# If a parameter is a type, parse it recursively. Otherwise
# assume it's a constant, and store it verbatim.
if any(param.strip().startswith(kw) for kw in type_keys):
@@ -140,21 +144,24 @@ class CxxClass(object):
arg.declare(code)
# Re-open the target namespace.
for ns in self.namespaces:
code('namespace $ns {')
code("namespace $ns {")
# If this is a class template...
if self.template_params:
code('template <${{", ".join(self.template_params)}}>')
# The actual class declaration.
code('class ${{self.name}};')
code("class ${{self.name}};")
# Close the target namespaces.
for ns in reversed(self.namespaces):
code('} // namespace $ns')
code("} // namespace $ns")
code('''\
code(
"""\
#ifndef __PARAMS__${sim_object}__
#define __PARAMS__${sim_object}__
''')
"""
)
# The base SimObject has a couple of params that get
@@ -162,10 +169,12 @@ code('''\
# the normal Param mechanism; we slip them in here (needed
# predecls now, actual declarations below)
if sim_object == SimObject:
code('''#include <string>''')
code("""#include <string>""")
cxx_class = CxxClass(sim_object._value_dict['cxx_class'],
sim_object._value_dict['cxx_template_params'])
cxx_class = CxxClass(
sim_object._value_dict["cxx_class"],
sim_object._value_dict["cxx_template_params"],
)
# A forward class declaration is sufficient since we are just
# declaring a pointer.
@@ -186,27 +195,29 @@ for ptype in ptypes:
code('#include "enums/${{ptype.__name__}}.hh"')
code()
code('namespace gem5')
code('{')
code('')
code("namespace gem5")
code("{")
code("")
# now generate the actual param struct
code("struct ${sim_object}Params")
if sim_object._base:
code(" : public ${{sim_object._base.type}}Params")
code("{")
if not hasattr(sim_object, 'abstract') or not sim_object.abstract:
if 'type' in sim_object.__dict__:
if not hasattr(sim_object, "abstract") or not sim_object.abstract:
if "type" in sim_object.__dict__:
code(" ${{sim_object.cxx_type}} create() const;")
code.indent()
if sim_object == SimObject:
code('''
code(
"""
SimObjectParams() {}
virtual ~SimObjectParams() {}
std::string name;
''')
"""
)
for param in params:
param.cxx_decl(code)
@@ -214,11 +225,11 @@ for port in ports.values():
port.cxx_decl(code)
code.dedent()
code('};')
code("};")
code()
code('} // namespace gem5')
code("} // namespace gem5")
code()
code('#endif // __PARAMS__${sim_object}__')
code("#endif // __PARAMS__${sim_object}__")
code.write(args.param_hh)

View File

@@ -28,9 +28,11 @@ from common.SysPaths import script, disk, binary
from os import environ as env
from m5.defines import buildEnv
class SysConfig:
def __init__(self, script=None, mem=None, disks=None, rootdev=None,
os_type='linux'):
def __init__(
self, script=None, mem=None, disks=None, rootdev=None, os_type="linux"
):
self.scriptname = script
self.disknames = disks
self.memsize = mem
@@ -41,13 +43,13 @@ class SysConfig:
if self.scriptname:
return script(self.scriptname)
else:
return ''
return ""
def mem(self):
if self.memsize:
return self.memsize
else:
return '128MB'
return "128MB"
def disks(self):
if self.disknames:
@@ -59,72 +61,117 @@ class SysConfig:
if self.root:
return self.root
else:
return '/dev/sda1'
return "/dev/sda1"
def os_type(self):
return self.ostype
# Benchmarks are defined as a key in a dict which is a list of SysConfigs
# The first defined machine is the test system, the others are driving systems
Benchmarks = {
'PovrayBench': [SysConfig('povray-bench.rcS', '512MB', ['povray.img'])],
'PovrayAutumn': [SysConfig('povray-autumn.rcS', '512MB', ['povray.img'])],
'NetperfStream': [SysConfig('netperf-stream-client.rcS'),
SysConfig('netperf-server.rcS')],
'NetperfStreamUdp': [SysConfig('netperf-stream-udp-client.rcS'),
SysConfig('netperf-server.rcS')],
'NetperfUdpLocal': [SysConfig('netperf-stream-udp-local.rcS')],
'NetperfStreamNT': [SysConfig('netperf-stream-nt-client.rcS'),
SysConfig('netperf-server.rcS')],
'NetperfMaerts': [SysConfig('netperf-maerts-client.rcS'),
SysConfig('netperf-server.rcS')],
'SurgeStandard': [SysConfig('surge-server.rcS', '512MB'),
SysConfig('surge-client.rcS', '256MB')],
'SurgeSpecweb': [SysConfig('spec-surge-server.rcS', '512MB'),
SysConfig('spec-surge-client.rcS', '256MB')],
'Nhfsstone': [SysConfig('nfs-server-nhfsstone.rcS', '512MB'),
SysConfig('nfs-client-nhfsstone.rcS')],
'Nfs': [SysConfig('nfs-server.rcS', '900MB'),
SysConfig('nfs-client-dbench.rcS')],
'NfsTcp': [SysConfig('nfs-server.rcS', '900MB'),
SysConfig('nfs-client-tcp.rcS')],
'IScsiInitiator': [SysConfig('iscsi-client.rcS', '512MB'),
SysConfig('iscsi-server.rcS', '512MB')],
'IScsiTarget': [SysConfig('iscsi-server.rcS', '512MB'),
SysConfig('iscsi-client.rcS', '512MB')],
'Validation': [SysConfig('iscsi-server.rcS', '512MB'),
SysConfig('iscsi-client.rcS', '512MB')],
'Ping': [SysConfig('ping-server.rcS',),
SysConfig('ping-client.rcS')],
'ValAccDelay': [SysConfig('devtime.rcS', '512MB')],
'ValAccDelay2': [SysConfig('devtimewmr.rcS', '512MB')],
'ValMemLat': [SysConfig('micro_memlat.rcS', '512MB')],
'ValMemLat2MB': [SysConfig('micro_memlat2mb.rcS', '512MB')],
'ValMemLat8MB': [SysConfig('micro_memlat8mb.rcS', '512MB')],
'ValMemLat': [SysConfig('micro_memlat8.rcS', '512MB')],
'ValTlbLat': [SysConfig('micro_tlblat.rcS', '512MB')],
'ValSysLat': [SysConfig('micro_syscall.rcS', '512MB')],
'ValCtxLat': [SysConfig('micro_ctx.rcS', '512MB')],
'ValStream': [SysConfig('micro_stream.rcS', '512MB')],
'ValStreamScale': [SysConfig('micro_streamscale.rcS', '512MB')],
'ValStreamCopy': [SysConfig('micro_streamcopy.rcS', '512MB')],
'MutexTest': [SysConfig('mutex-test.rcS', '128MB')],
'ArmAndroid-GB': [SysConfig('null.rcS', '256MB',
['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img'],
None, 'android-gingerbread')],
'bbench-gb': [SysConfig('bbench-gb.rcS', '256MB',
['ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img'],
None, 'android-gingerbread')],
'ArmAndroid-ICS': [SysConfig('null.rcS', '256MB',
['ARMv7a-ICS-Android.SMP.nolock.clean.img'],
None, 'android-ics')],
'bbench-ics': [SysConfig('bbench-ics.rcS', '256MB',
['ARMv7a-ICS-Android.SMP.nolock.img'],
None, 'android-ics')]
"PovrayBench": [SysConfig("povray-bench.rcS", "512MB", ["povray.img"])],
"PovrayAutumn": [SysConfig("povray-autumn.rcS", "512MB", ["povray.img"])],
"NetperfStream": [
SysConfig("netperf-stream-client.rcS"),
SysConfig("netperf-server.rcS"),
],
"NetperfStreamUdp": [
SysConfig("netperf-stream-udp-client.rcS"),
SysConfig("netperf-server.rcS"),
],
"NetperfUdpLocal": [SysConfig("netperf-stream-udp-local.rcS")],
"NetperfStreamNT": [
SysConfig("netperf-stream-nt-client.rcS"),
SysConfig("netperf-server.rcS"),
],
"NetperfMaerts": [
SysConfig("netperf-maerts-client.rcS"),
SysConfig("netperf-server.rcS"),
],
"SurgeStandard": [
SysConfig("surge-server.rcS", "512MB"),
SysConfig("surge-client.rcS", "256MB"),
],
"SurgeSpecweb": [
SysConfig("spec-surge-server.rcS", "512MB"),
SysConfig("spec-surge-client.rcS", "256MB"),
],
"Nhfsstone": [
SysConfig("nfs-server-nhfsstone.rcS", "512MB"),
SysConfig("nfs-client-nhfsstone.rcS"),
],
"Nfs": [
SysConfig("nfs-server.rcS", "900MB"),
SysConfig("nfs-client-dbench.rcS"),
],
"NfsTcp": [
SysConfig("nfs-server.rcS", "900MB"),
SysConfig("nfs-client-tcp.rcS"),
],
"IScsiInitiator": [
SysConfig("iscsi-client.rcS", "512MB"),
SysConfig("iscsi-server.rcS", "512MB"),
],
"IScsiTarget": [
SysConfig("iscsi-server.rcS", "512MB"),
SysConfig("iscsi-client.rcS", "512MB"),
],
"Validation": [
SysConfig("iscsi-server.rcS", "512MB"),
SysConfig("iscsi-client.rcS", "512MB"),
],
"Ping": [SysConfig("ping-server.rcS"), SysConfig("ping-client.rcS")],
"ValAccDelay": [SysConfig("devtime.rcS", "512MB")],
"ValAccDelay2": [SysConfig("devtimewmr.rcS", "512MB")],
"ValMemLat": [SysConfig("micro_memlat.rcS", "512MB")],
"ValMemLat2MB": [SysConfig("micro_memlat2mb.rcS", "512MB")],
"ValMemLat8MB": [SysConfig("micro_memlat8mb.rcS", "512MB")],
"ValMemLat": [SysConfig("micro_memlat8.rcS", "512MB")],
"ValTlbLat": [SysConfig("micro_tlblat.rcS", "512MB")],
"ValSysLat": [SysConfig("micro_syscall.rcS", "512MB")],
"ValCtxLat": [SysConfig("micro_ctx.rcS", "512MB")],
"ValStream": [SysConfig("micro_stream.rcS", "512MB")],
"ValStreamScale": [SysConfig("micro_streamscale.rcS", "512MB")],
"ValStreamCopy": [SysConfig("micro_streamcopy.rcS", "512MB")],
"MutexTest": [SysConfig("mutex-test.rcS", "128MB")],
"ArmAndroid-GB": [
SysConfig(
"null.rcS",
"256MB",
["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.clean.img"],
None,
"android-gingerbread",
)
],
"bbench-gb": [
SysConfig(
"bbench-gb.rcS",
"256MB",
["ARMv7a-Gingerbread-Android.SMP.mouse.nolock.img"],
None,
"android-gingerbread",
)
],
"ArmAndroid-ICS": [
SysConfig(
"null.rcS",
"256MB",
["ARMv7a-ICS-Android.SMP.nolock.clean.img"],
None,
"android-ics",
)
],
"bbench-ics": [
SysConfig(
"bbench-ics.rcS",
"256MB",
["ARMv7a-ICS-Android.SMP.nolock.img"],
None,
"android-ics",
)
],
}
benchs = list(Benchmarks.keys())

View File

@@ -42,9 +42,13 @@
import m5
from m5.objects import *
from gem5.isas import ISA
from gem5.runtime import get_runtime_isa
from common.Caches import *
from common import ObjectList
def _get_hwp(hwp_option):
if hwp_option == None:
return NULL
@@ -52,23 +56,25 @@ def _get_hwp(hwp_option):
hwpClass = ObjectList.hwp_list.get(hwp_option)
return hwpClass()
def _get_cache_opts(level, options):
opts = {}
size_attr = '{}_size'.format(level)
size_attr = "{}_size".format(level)
if hasattr(options, size_attr):
opts['size'] = getattr(options, size_attr)
opts["size"] = getattr(options, size_attr)
assoc_attr = '{}_assoc'.format(level)
assoc_attr = "{}_assoc".format(level)
if hasattr(options, assoc_attr):
opts['assoc'] = getattr(options, assoc_attr)
opts["assoc"] = getattr(options, assoc_attr)
prefetcher_attr = '{}_hwp_type'.format(level)
prefetcher_attr = "{}_hwp_type".format(level)
if hasattr(options, prefetcher_attr):
opts['prefetcher'] = _get_hwp(getattr(options, prefetcher_attr))
opts["prefetcher"] = _get_hwp(getattr(options, prefetcher_attr))
return opts
def config_cache(options, system):
if options.external_memory_system and (options.caches or options.l2cache):
print("External caches and internal caches are exclusive options.\n")
@@ -84,10 +90,12 @@ def config_cache(options, system):
print("O3_ARM_v7a_3 is unavailable. Did you compile the O3 model?")
sys.exit(1)
dcache_class, icache_class, l2_cache_class, walk_cache_class = \
core.O3_ARM_v7a_DCache, core.O3_ARM_v7a_ICache, \
core.O3_ARM_v7aL2, \
None
dcache_class, icache_class, l2_cache_class, walk_cache_class = (
core.O3_ARM_v7a_DCache,
core.O3_ARM_v7a_ICache,
core.O3_ARM_v7aL2,
None,
)
elif options.cpu_type == "HPI":
try:
import cores.arm.HPI as core
@@ -95,13 +103,21 @@ def config_cache(options, system):
print("HPI is unavailable.")
sys.exit(1)
dcache_class, icache_class, l2_cache_class, walk_cache_class = \
core.HPI_DCache, core.HPI_ICache, core.HPI_L2, None
dcache_class, icache_class, l2_cache_class, walk_cache_class = (
core.HPI_DCache,
core.HPI_ICache,
core.HPI_L2,
None,
)
else:
dcache_class, icache_class, l2_cache_class, walk_cache_class = \
L1_DCache, L1_ICache, L2Cache, None
dcache_class, icache_class, l2_cache_class, walk_cache_class = (
L1_DCache,
L1_ICache,
L2Cache,
None,
)
if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
walk_cache_class = PageTableWalkerCache
# Set the cache line size of the system
@@ -118,10 +134,11 @@ def config_cache(options, system):
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the
# same clock as the CPUs.
system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
**_get_cache_opts('l2', options))
system.l2 = l2_cache_class(
clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", options)
)
system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
system.l2.cpu_side = system.tol2bus.mem_side_ports
system.l2.mem_side = system.membus.cpu_side_ports
@@ -130,8 +147,8 @@ def config_cache(options, system):
for i in range(options.num_cpus):
if options.caches:
icache = icache_class(**_get_cache_opts('l1i', options))
dcache = dcache_class(**_get_cache_opts('l1d', options))
icache = icache_class(**_get_cache_opts("l1i", options))
dcache = dcache_class(**_get_cache_opts("l1d", options))
# If we have a walker cache specified, instantiate two
# instances here
@@ -159,8 +176,9 @@ def config_cache(options, system):
# When connecting the caches, the clock is also inherited
# from the CPU in question
system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
iwalkcache, dwalkcache)
system.cpu[i].addPrivateSplitL1Caches(
icache, dcache, iwalkcache, dwalkcache
)
if options.memchecker:
# The mem_side ports of the caches haven't been connected yet.
@@ -174,47 +192,56 @@ def config_cache(options, system):
# on these names. For simplicity, we would advise configuring
# it to use this naming scheme; if this isn't possible, change
# the names below.
if buildEnv['TARGET_ISA'] in ['x86', 'arm', 'riscv']:
if get_runtime_isa() in [ISA.X86, ISA.ARM, ISA.RISCV]:
system.cpu[i].addPrivateSplitL1Caches(
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i),
ExternalCache("cpu%d.itb_walker_cache" % i),
ExternalCache("cpu%d.dtb_walker_cache" % i))
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i),
ExternalCache("cpu%d.itb_walker_cache" % i),
ExternalCache("cpu%d.dtb_walker_cache" % i),
)
else:
system.cpu[i].addPrivateSplitL1Caches(
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i))
ExternalCache("cpu%d.icache" % i),
ExternalCache("cpu%d.dcache" % i),
)
system.cpu[i].createInterruptController()
if options.l2cache:
system.cpu[i].connectAllPorts(
system.tol2bus.cpu_side_ports,
system.membus.cpu_side_ports, system.membus.mem_side_ports)
system.membus.cpu_side_ports,
system.membus.mem_side_ports,
)
elif options.external_memory_system:
system.cpu[i].connectUncachedPorts(
system.membus.cpu_side_ports, system.membus.mem_side_ports)
system.membus.cpu_side_ports, system.membus.mem_side_ports
)
else:
system.cpu[i].connectBus(system.membus)
return system
# ExternalSlave provides a "port", but when that port connects to a cache,
# the connecting CPU SimObject wants to refer to its "cpu_side".
# The 'ExternalCache' class provides this adaptation by rewriting the name,
# eliminating distracting changes elsewhere in the config code.
class ExternalCache(ExternalSlave):
def __getattr__(cls, attr):
if (attr == "cpu_side"):
if attr == "cpu_side":
attr = "port"
return super(ExternalSlave, cls).__getattr__(attr)
def __setattr__(cls, attr, value):
if (attr == "cpu_side"):
if attr == "cpu_side":
attr = "port"
return super(ExternalSlave, cls).__setattr__(attr, value)
def ExternalCacheFactory(port_type):
def make(name):
return ExternalCache(port_data=name, port_type=port_type,
addr_ranges=[AllMemory])
return ExternalCache(
port_data=name, port_type=port_type, addr_ranges=[AllMemory]
)
return make

View File

@@ -39,6 +39,8 @@
from m5.defines import buildEnv
from m5.objects import *
from gem5.isas import ISA
from gem5.runtime import get_runtime_isa
# Base implementations of L1, L2, IO and TLB-walker caches. There are
# used in the regressions and also as base components in the
@@ -46,6 +48,7 @@ from m5.objects import *
# starting point, and specific parameters can be overridden in the
# specific instantiations.
class L1Cache(Cache):
assoc = 2
tag_latency = 2
@@ -54,14 +57,17 @@ class L1Cache(Cache):
mshrs = 4
tgts_per_mshr = 20
class L1_ICache(L1Cache):
is_read_only = True
# Writeback clean lines as well
writeback_clean = True
class L1_DCache(L1Cache):
pass
class L2Cache(Cache):
assoc = 8
tag_latency = 20
@@ -71,26 +77,28 @@ class L2Cache(Cache):
tgts_per_mshr = 12
write_buffers = 8
class IOCache(Cache):
assoc = 8
tag_latency = 50
data_latency = 50
response_latency = 50
mshrs = 20
size = '1kB'
size = "1kB"
tgts_per_mshr = 12
class PageTableWalkerCache(Cache):
assoc = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 10
size = '1kB'
size = "1kB"
tgts_per_mshr = 12
# the x86 table walker actually writes to the table-walker cache
if buildEnv['TARGET_ISA'] in ['x86', 'riscv']:
if get_runtime_isa() in [ISA.X86, ISA.RISCV]:
is_read_only = False
else:
is_read_only = True

View File

@@ -36,6 +36,7 @@
from m5 import fatal
import m5.objects
def config_etrace(cpu_cls, cpu_list, options):
if issubclass(cpu_cls, m5.objects.DerivO3CPU):
# Assign the same file name to all cpus for now. This must be
@@ -45,17 +46,21 @@ def config_etrace(cpu_cls, cpu_list, options):
# file names. Set the dependency window size equal to the cpu it
# is attached to.
cpu.traceListener = m5.objects.ElasticTrace(
instFetchTraceFile = options.inst_trace_file,
dataDepTraceFile = options.data_trace_file,
depWindowSize = 3 * cpu.numROBEntries)
instFetchTraceFile=options.inst_trace_file,
dataDepTraceFile=options.data_trace_file,
depWindowSize=3 * cpu.numROBEntries,
)
# Make the number of entries in the ROB, LQ and SQ very
# large so that there are no stalls due to resource
# limitation as such stalls will get captured in the trace
# as compute delay. For replay, ROB, LQ and SQ sizes are
# modelled in the Trace CPU.
cpu.numROBEntries = 512;
cpu.LQEntries = 128;
cpu.SQEntries = 128;
cpu.numROBEntries = 512
cpu.LQEntries = 128
cpu.SQEntries = 128
else:
fatal("%s does not support data dependency tracing. Use a CPU model of"
" type or inherited from DerivO3CPU.", cpu_cls)
fatal(
"%s does not support data dependency tracing. Use a CPU model of"
" type or inherited from DerivO3CPU.",
cpu_cls,
)

View File

@@ -39,69 +39,87 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import m5
import m5.defines
from m5.objects import *
from m5.util import *
from common.Benchmarks import *
from common import ObjectList
# Populate to reflect supported os types per target ISA
os_types = { 'mips' : [ 'linux' ],
'riscv' : [ 'linux' ], # TODO that's a lie
'sparc' : [ 'linux' ],
'x86' : [ 'linux' ],
'arm' : [ 'linux',
'android-gingerbread',
'android-ics',
'android-jellybean',
'android-kitkat',
'android-nougat', ],
}
os_types = set()
if m5.defines.buildEnv["USE_ARM_ISA"]:
os_types.update(
[
"linux",
"android-gingerbread",
"android-ics",
"android-jellybean",
"android-kitkat",
"android-nougat",
]
)
if m5.defines.buildEnv["USE_MIPS_ISA"]:
os_types.add("linux")
if m5.defines.buildEnv["USE_POWER_ISA"]:
os_types.add("linux")
if m5.defines.buildEnv["USE_RISCV_ISA"]:
os_types.add("linux") # TODO that's a lie
if m5.defines.buildEnv["USE_SPARC_ISA"]:
os_types.add("linux")
if m5.defines.buildEnv["USE_X86_ISA"]:
os_types.add("linux")
class CowIdeDisk(IdeDisk):
image = CowDiskImage(child=RawDiskImage(read_only=True),
read_only=False)
image = CowDiskImage(child=RawDiskImage(read_only=True), read_only=False)
def childImage(self, ci):
self.image.child.image_file = ci
class MemBus(SystemXBar):
badaddr_responder = BadAddr()
default = Self.badaddr_responder.pio
def attach_9p(parent, bus):
viopci = PciVirtIO()
viopci.vio = VirtIO9PDiod()
viodir = os.path.realpath(os.path.join(m5.options.outdir, '9p'))
viopci.vio.root = os.path.join(viodir, 'share')
viopci.vio.socketPath = os.path.join(viodir, 'socket')
viodir = os.path.realpath(os.path.join(m5.options.outdir, "9p"))
viopci.vio.root = os.path.join(viodir, "share")
viopci.vio.socketPath = os.path.join(viodir, "socket")
os.makedirs(viopci.vio.root, exist_ok=True)
if os.path.exists(viopci.vio.socketPath):
os.remove(viopci.vio.socketPath)
parent.viopci = viopci
parent.attachPciDevice(viopci, bus)
def fillInCmdline(mdesc, template, **kwargs):
kwargs.setdefault('rootdev', mdesc.rootdev())
kwargs.setdefault('mem', mdesc.mem())
kwargs.setdefault('script', mdesc.script())
kwargs.setdefault("rootdev", mdesc.rootdev())
kwargs.setdefault("mem", mdesc.mem())
kwargs.setdefault("script", mdesc.script())
return template % kwargs
def makeCowDisks(disk_paths):
disks = []
for disk_path in disk_paths:
disk = CowIdeDisk(driveID='device0')
disk.childImage(disk_path);
disk = CowIdeDisk(driveID="device0")
disk.childImage(disk_path)
disks.append(disk)
return disks
def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
# Constants from iob.cc and uart8250.cc
iob_man_addr = 0x9800000000
uart_pio_size = 8
class CowMmDisk(MmDisk):
image = CowDiskImage(child=RawDiskImage(read_only=True),
read_only=False)
image = CowDiskImage(
child=RawDiskImage(read_only=True), read_only=False
)
def childImage(self, ci):
self.image.child.image_file = ci
@@ -113,12 +131,14 @@ def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
self.readfile = mdesc.script()
self.iobus = IOXBar()
self.membus = MemBus()
self.bridge = Bridge(delay='50ns')
self.bridge = Bridge(delay="50ns")
self.t1000 = T1000()
self.t1000.attachOnChipIO(self.membus)
self.t1000.attachIO(self.iobus)
self.mem_ranges = [AddrRange(Addr('1MB'), size = '64MB'),
AddrRange(Addr('2GB'), size ='256MB')]
self.mem_ranges = [
AddrRange(Addr("1MB"), size="64MB"),
AddrRange(Addr("2GB"), size="256MB"),
]
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
self.disk0 = CowMmDisk()
@@ -128,36 +148,47 @@ def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
# The puart0 and hvuart are placed on the IO bus, so create ranges
# for them. The remaining IO range is rather fragmented, so poke
# holes for the iob and partition descriptors etc.
self.bridge.ranges = \
[
AddrRange(self.t1000.puart0.pio_addr,
self.t1000.puart0.pio_addr + uart_pio_size - 1),
AddrRange(self.disk0.pio_addr,
self.t1000.fake_jbi.pio_addr +
self.t1000.fake_jbi.pio_size - 1),
AddrRange(self.t1000.fake_clk.pio_addr,
iob_man_addr - 1),
AddrRange(self.t1000.fake_l2_1.pio_addr,
self.t1000.fake_ssi.pio_addr +
self.t1000.fake_ssi.pio_size - 1),
AddrRange(self.t1000.hvuart.pio_addr,
self.t1000.hvuart.pio_addr + uart_pio_size - 1)
]
self.bridge.ranges = [
AddrRange(
self.t1000.puart0.pio_addr,
self.t1000.puart0.pio_addr + uart_pio_size - 1,
),
AddrRange(
self.disk0.pio_addr,
self.t1000.fake_jbi.pio_addr + self.t1000.fake_jbi.pio_size - 1,
),
AddrRange(self.t1000.fake_clk.pio_addr, iob_man_addr - 1),
AddrRange(
self.t1000.fake_l2_1.pio_addr,
self.t1000.fake_ssi.pio_addr + self.t1000.fake_ssi.pio_size - 1,
),
AddrRange(
self.t1000.hvuart.pio_addr,
self.t1000.hvuart.pio_addr + uart_pio_size - 1,
),
]
workload = SparcFsWorkload()
# ROM for OBP/Reset/Hypervisor
self.rom = SimpleMemory(image_file=binary('t1000_rom.bin'),
range=AddrRange(0xfff0000000, size='8MB'))
self.rom = SimpleMemory(
image_file=binary("t1000_rom.bin"),
range=AddrRange(0xFFF0000000, size="8MB"),
)
# nvram
self.nvram = SimpleMemory(image_file=binary('nvram1'),
range=AddrRange(0x1f11000000, size='8kB'))
self.nvram = SimpleMemory(
image_file=binary("nvram1"), range=AddrRange(0x1F11000000, size="8kB")
)
# hypervisor description
self.hypervisor_desc = SimpleMemory(image_file=binary('1up-hv.bin'),
range=AddrRange(0x1f12080000, size='8kB'))
self.hypervisor_desc = SimpleMemory(
image_file=binary("1up-hv.bin"),
range=AddrRange(0x1F12080000, size="8kB"),
)
# partition description
self.partition_desc = SimpleMemory(image_file=binary('1up-md.bin'),
range=AddrRange(0x1f12000000, size='8kB'))
self.partition_desc = SimpleMemory(
image_file=binary("1up-md.bin"),
range=AddrRange(0x1F12000000, size="8kB"),
)
self.rom.port = self.membus.mem_side_ports
self.nvram.port = self.membus.mem_side_ports
@@ -170,10 +201,20 @@ def makeSparcSystem(mem_mode, mdesc=None, cmdline=None):
return self
def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
dtb_filename=None, bare_metal=False, cmdline=None,
external_memory="", ruby=False,
vio_9p=None, bootloader=None):
def makeArmSystem(
mem_mode,
machine_type,
num_cpus=1,
mdesc=None,
dtb_filename=None,
bare_metal=False,
cmdline=None,
external_memory="",
ruby=False,
vio_9p=None,
bootloader=None,
):
assert machine_type
pci_devices = []
@@ -187,7 +228,7 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
self.readfile = mdesc.script()
self.iobus = IOXBar()
if not ruby:
self.bridge = Bridge(delay='50ns')
self.bridge = Bridge(delay="50ns")
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.membus = MemBus()
self.membus.badaddr_responder.warn_access = "warn"
@@ -227,13 +268,17 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
self.mem_ranges.append(AddrRange(region.start, size=size_remain))
size_remain = 0
break
warn("Memory size specified spans more than one region. Creating" \
" another memory controller for that range.")
warn(
"Memory size specified spans more than one region. Creating"
" another memory controller for that range."
)
if size_remain > 0:
fatal("The currently selected ARM platforms doesn't support" \
" the amount of DRAM you've selected. Please try" \
" another platform")
fatal(
"The currently selected ARM platforms doesn't support"
" the amount of DRAM you've selected. Please try"
" another platform"
)
if bare_metal:
# EOT character on UART will end the simulation
@@ -245,16 +290,19 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
if dtb_filename:
workload.dtb_filename = binary(dtb_filename)
workload.machine_type = \
workload.machine_type = (
machine_type if machine_type in ArmMachineType.map else "DTOnly"
)
# Ensure that writes to the UART actually go out early in the boot
if not cmdline:
cmdline = 'earlyprintk=pl011,0x1c090000 console=ttyAMA0 ' + \
'lpj=19988480 norandmaps rw loglevel=8 ' + \
'mem=%(mem)s root=%(rootdev)s'
cmdline = (
"earlyprintk=pl011,0x1c090000 console=ttyAMA0 "
+ "lpj=19988480 norandmaps rw loglevel=8 "
+ "mem=%(mem)s root=%(rootdev)s"
)
if hasattr(self.realview.gic, 'cpu_addr'):
if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
# This check is for users who have previously put 'android' in
@@ -263,30 +311,37 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
# behavior has been replaced with a more explicit option per
# the error message below. The disk can have any name now and
# doesn't need to include 'android' substring.
if (mdesc.disks() and
os.path.split(mdesc.disks()[0])[-1].lower().count('android')):
if 'android' not in mdesc.os_type():
fatal("It looks like you are trying to boot an Android " \
"platform. To boot Android, you must specify " \
"--os-type with an appropriate Android release on " \
"the command line.")
if mdesc.disks() and os.path.split(mdesc.disks()[0])[-1].lower().count(
"android"
):
if "android" not in mdesc.os_type():
fatal(
"It looks like you are trying to boot an Android "
"platform. To boot Android, you must specify "
"--os-type with an appropriate Android release on "
"the command line."
)
# android-specific tweaks
if 'android' in mdesc.os_type():
if "android" in mdesc.os_type():
# generic tweaks
cmdline += " init=/init"
# release-specific tweaks
if 'kitkat' in mdesc.os_type():
cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
"android.bootanim=0 "
elif 'nougat' in mdesc.os_type():
cmdline += " androidboot.hardware=gem5 qemu=1 qemu.gles=0 " + \
"android.bootanim=0 " + \
"vmalloc=640MB " + \
"android.early.fstab=/fstab.gem5 " + \
"androidboot.selinux=permissive " + \
"video=Virtual-1:1920x1080-16"
if "kitkat" in mdesc.os_type():
cmdline += (
" androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+ "android.bootanim=0 "
)
elif "nougat" in mdesc.os_type():
cmdline += (
" androidboot.hardware=gem5 qemu=1 qemu.gles=0 "
+ "android.bootanim=0 "
+ "vmalloc=640MB "
+ "android.early.fstab=/fstab.gem5 "
+ "androidboot.selinux=permissive "
+ "video=Virtual-1:1920x1080-16"
)
workload.command_line = fillInCmdline(mdesc, cmdline)
@@ -296,14 +351,17 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
if external_memory:
# I/O traffic enters iobus
self.external_io = ExternalMaster(port_data="external_io",
port_type=external_memory)
self.external_io = ExternalMaster(
port_data="external_io", port_type=external_memory
)
self.external_io.port = self.iobus.cpu_side_ports
# Ensure iocache only receives traffic destined for (actual) memory.
self.iocache = ExternalSlave(port_data="iocache",
port_type=external_memory,
addr_ranges=self.mem_ranges)
self.iocache = ExternalSlave(
port_data="iocache",
port_type=external_memory,
addr_ranges=self.mem_ranges,
)
self.iocache.port = self.iobus.mem_side_ports
# Let system_port get to nvmem and nothing else.
@@ -313,10 +371,11 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
# Attach off-chip devices
self.realview.attachIO(self.iobus)
elif ruby:
self._dma_ports = [ ]
self._mem_ports = [ ]
self.realview.attachOnChipIO(self.iobus,
dma_ports=self._dma_ports, mem_ports=self._mem_ports)
self._dma_ports = []
self._mem_ports = []
self.realview.attachOnChipIO(
self.iobus, dma_ports=self._dma_ports, mem_ports=self._mem_ports
)
self.realview.attachIO(self.iobus, dma_ports=self._dma_ports)
else:
self.realview.attachOnChipIO(self.membus, self.bridge)
@@ -325,8 +384,8 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
for dev in pci_devices:
self.realview.attachPciDevice(
dev, self.iobus,
dma_ports=self._dma_ports if ruby else None)
dev, self.iobus, dma_ports=self._dma_ports if ruby else None
)
self.terminal = Terminal()
self.vncserver = VncServer()
@@ -338,10 +397,12 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
self.system_port = self.membus.cpu_side_ports
if ruby:
if buildEnv['PROTOCOL'] == 'MI_example' and num_cpus > 1:
fatal("The MI_example protocol cannot implement Load/Store "
"Exclusive operations. Multicore ARM systems configured "
"with the MI_example protocol will not work properly.")
if buildEnv["PROTOCOL"] == "MI_example" and num_cpus > 1:
fatal(
"The MI_example protocol cannot implement Load/Store "
"Exclusive operations. Multicore ARM systems configured "
"with the MI_example protocol will not work properly."
)
return self
@@ -349,8 +410,9 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
class BaseMalta(Malta):
ethernet = NSGigE(pci_bus=0, pci_dev=1, pci_func=0)
ide = IdeController(disks=Parent.disks,
pci_func=0, pci_dev=0, pci_bus=0)
ide = IdeController(
disks=Parent.disks, pci_func=0, pci_dev=0, pci_bus=0
)
self = System()
if not mdesc:
@@ -359,8 +421,8 @@ def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
self.readfile = mdesc.script()
self.iobus = IOXBar()
self.membus = MemBus()
self.bridge = Bridge(delay='50ns')
self.mem_ranges = [AddrRange('1GB')]
self.bridge = Bridge(delay="50ns")
self.mem_ranges = [AddrRange("1GB")]
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
self.disks = makeCowDisks(mdesc.disks())
@@ -370,35 +432,38 @@ def makeLinuxMipsSystem(mem_mode, mdesc=None, cmdline=None):
self.malta.ide.dma = self.iobus.cpu_side_ports
self.malta.ethernet.pio = self.iobus.mem_side_ports
self.malta.ethernet.dma = self.iobus.cpu_side_ports
self.simple_disk = SimpleDisk(disk=RawDiskImage(
image_file = mdesc.disks()[0], read_only = True))
self.simple_disk = SimpleDisk(
disk=RawDiskImage(image_file=mdesc.disks()[0], read_only=True)
)
self.mem_mode = mem_mode
self.terminal = Terminal()
self.console = binary('mips/console')
self.console = binary("mips/console")
if not cmdline:
cmdline = 'root=/dev/hda1 console=ttyS0'
cmdline = "root=/dev/hda1 console=ttyS0"
self.workload = KernelWorkload(command_line=fillInCmdline(mdesc, cmdline))
self.system_port = self.membus.cpu_side_ports
return self
def x86IOAddress(port):
IO_address_space_base = 0x8000000000000000
return IO_address_space_base + port
def connectX86ClassicSystem(x86_sys, numCPUs):
# Constants similar to x86_traits.hh
IO_address_space_base = 0x8000000000000000
pci_config_address_space_base = 0xc000000000000000
interrupts_address_space_base = 0xa000000000000000
APIC_range_size = 1 << 12;
pci_config_address_space_base = 0xC000000000000000
interrupts_address_space_base = 0xA000000000000000
APIC_range_size = 1 << 12
x86_sys.membus = MemBus()
# North Bridge
x86_sys.iobus = IOXBar()
x86_sys.bridge = Bridge(delay='50ns')
x86_sys.bridge = Bridge(delay="50ns")
x86_sys.bridge.mem_side_port = x86_sys.iobus.cpu_side_ports
x86_sys.bridge.cpu_side_port = x86_sys.membus.mem_side_ports
# Allow the bridge to pass through:
@@ -407,30 +472,30 @@ def connectX86ClassicSystem(x86_sys, numCPUs):
# 2) the bridge to pass through the IO APIC (two pages, already contained in 1),
# 3) everything in the IO address range up to the local APIC, and
# 4) then the entire PCI address space and beyond.
x86_sys.bridge.ranges = \
[
x86_sys.bridge.ranges = [
AddrRange(0xC0000000, 0xFFFF0000),
AddrRange(IO_address_space_base,
interrupts_address_space_base - 1),
AddrRange(pci_config_address_space_base,
Addr.max)
]
AddrRange(IO_address_space_base, interrupts_address_space_base - 1),
AddrRange(pci_config_address_space_base, Addr.max),
]
# Create a bridge from the IO bus to the memory bus to allow access to
# the local APIC (two pages)
x86_sys.apicbridge = Bridge(delay='50ns')
x86_sys.apicbridge = Bridge(delay="50ns")
x86_sys.apicbridge.cpu_side_port = x86_sys.iobus.mem_side_ports
x86_sys.apicbridge.mem_side_port = x86_sys.membus.cpu_side_ports
x86_sys.apicbridge.ranges = [AddrRange(interrupts_address_space_base,
interrupts_address_space_base +
numCPUs * APIC_range_size
- 1)]
x86_sys.apicbridge.ranges = [
AddrRange(
interrupts_address_space_base,
interrupts_address_space_base + numCPUs * APIC_range_size - 1,
)
]
# connect the io bus
x86_sys.pc.attachIO(x86_sys.iobus)
x86_sys.system_port = x86_sys.membus.cpu_side_ports
def connectX86RubySystem(x86_sys):
# North Bridge
x86_sys.iobus = IOXBar()
@@ -444,7 +509,7 @@ def connectX86RubySystem(x86_sys):
def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
self = System()
self.m5ops_base = 0xffff0000
self.m5ops_base = 0xFFFF0000
if workload is None:
workload = X86FsWorkload()
@@ -461,17 +526,22 @@ def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
# On the PC platform, the memory region 0xC0000000-0xFFFFFFFF is reserved
# for various devices. Hence, if the physical memory size is greater than
# 3GB, we need to split it into two parts.
excess_mem_size = \
convert.toMemorySize(mdesc.mem()) - convert.toMemorySize('3GB')
excess_mem_size = convert.toMemorySize(mdesc.mem()) - convert.toMemorySize(
"3GB"
)
if excess_mem_size <= 0:
self.mem_ranges = [AddrRange(mdesc.mem())]
else:
warn("Physical memory size specified is %s which is greater than " \
"3GB. Twice the number of memory controllers would be " \
"created." % (mdesc.mem()))
warn(
"Physical memory size specified is %s which is greater than "
"3GB. Twice the number of memory controllers would be "
"created." % (mdesc.mem())
)
self.mem_ranges = [AddrRange('3GB'),
AddrRange(Addr('4GB'), size = excess_mem_size)]
self.mem_ranges = [
AddrRange("3GB"),
AddrRange(Addr("4GB"), size=excess_mem_size),
]
# Platform
self.pc = Pc()
@@ -496,78 +566,78 @@ def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
madt_records = []
for i in range(numCPUs):
bp = X86IntelMPProcessor(
local_apic_id = i,
local_apic_version = 0x14,
enable = True,
bootstrap = (i == 0))
local_apic_id=i,
local_apic_version=0x14,
enable=True,
bootstrap=(i == 0),
)
base_entries.append(bp)
lapic = X86ACPIMadtLAPIC(
acpi_processor_id=i,
apic_id=i,
flags=1)
lapic = X86ACPIMadtLAPIC(acpi_processor_id=i, apic_id=i, flags=1)
madt_records.append(lapic)
io_apic = X86IntelMPIOAPIC(
id = numCPUs,
version = 0x11,
enable = True,
address = 0xfec00000)
id=numCPUs, version=0x11, enable=True, address=0xFEC00000
)
self.pc.south_bridge.io_apic.apic_id = io_apic.id
base_entries.append(io_apic)
madt_records.append(X86ACPIMadtIOAPIC(id=io_apic.id,
address=io_apic.address, int_base=0))
madt_records.append(
X86ACPIMadtIOAPIC(id=io_apic.id, address=io_apic.address, int_base=0)
)
# In gem5 Pc::calcPciConfigAddr(), it required "assert(bus==0)",
# but linux kernel cannot config PCI device if it was not connected to
# PCI bus, so we fix PCI bus id to 0, and ISA bus id to 1.
pci_bus = X86IntelMPBus(bus_id = 0, bus_type='PCI ')
pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ")
base_entries.append(pci_bus)
isa_bus = X86IntelMPBus(bus_id = 1, bus_type='ISA ')
isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ")
base_entries.append(isa_bus)
connect_busses = X86IntelMPBusHierarchy(bus_id=1,
subtractive_decode=True, parent_bus=0)
connect_busses = X86IntelMPBusHierarchy(
bus_id=1, subtractive_decode=True, parent_bus=0
)
ext_entries.append(connect_busses)
pci_dev4_inta = X86IntelMPIOIntAssignment(
interrupt_type = 'INT',
polarity = 'ConformPolarity',
trigger = 'ConformTrigger',
source_bus_id = 0,
source_bus_irq = 0 + (4 << 2),
dest_io_apic_id = io_apic.id,
dest_io_apic_intin = 16)
interrupt_type="INT",
polarity="ConformPolarity",
trigger="ConformTrigger",
source_bus_id=0,
source_bus_irq=0 + (4 << 2),
dest_io_apic_id=io_apic.id,
dest_io_apic_intin=16,
)
base_entries.append(pci_dev4_inta)
pci_dev4_inta_madt = X86ACPIMadtIntSourceOverride(
bus_source = pci_dev4_inta.source_bus_id,
irq_source = pci_dev4_inta.source_bus_irq,
sys_int = pci_dev4_inta.dest_io_apic_intin,
flags = 0
)
bus_source=pci_dev4_inta.source_bus_id,
irq_source=pci_dev4_inta.source_bus_irq,
sys_int=pci_dev4_inta.dest_io_apic_intin,
flags=0,
)
madt_records.append(pci_dev4_inta_madt)
def assignISAInt(irq, apicPin):
assign_8259_to_apic = X86IntelMPIOIntAssignment(
interrupt_type = 'ExtInt',
polarity = 'ConformPolarity',
trigger = 'ConformTrigger',
source_bus_id = 1,
source_bus_irq = irq,
dest_io_apic_id = io_apic.id,
dest_io_apic_intin = 0)
interrupt_type="ExtInt",
polarity="ConformPolarity",
trigger="ConformTrigger",
source_bus_id=1,
source_bus_irq=irq,
dest_io_apic_id=io_apic.id,
dest_io_apic_intin=0,
)
base_entries.append(assign_8259_to_apic)
assign_to_apic = X86IntelMPIOIntAssignment(
interrupt_type = 'INT',
polarity = 'ConformPolarity',
trigger = 'ConformTrigger',
source_bus_id = 1,
source_bus_irq = irq,
dest_io_apic_id = io_apic.id,
dest_io_apic_intin = apicPin)
interrupt_type="INT",
polarity="ConformPolarity",
trigger="ConformTrigger",
source_bus_id=1,
source_bus_irq=irq,
dest_io_apic_id=io_apic.id,
dest_io_apic_intin=apicPin,
)
base_entries.append(assign_to_apic)
# acpi
assign_to_apic_acpi = X86ACPIMadtIntSourceOverride(
bus_source = 1,
irq_source = irq,
sys_int = apicPin,
flags = 0
)
bus_source=1, irq_source=irq, sys_int=apicPin, flags=0
)
madt_records.append(assign_to_apic_acpi)
assignISAInt(0, 2)
assignISAInt(1, 1)
for i in range(3, 15):
@@ -575,64 +645,78 @@ def makeX86System(mem_mode, numCPUs=1, mdesc=None, workload=None, Ruby=False):
workload.intel_mp_table.base_entries = base_entries
workload.intel_mp_table.ext_entries = ext_entries
madt = X86ACPIMadt(local_apic_address=0,
records=madt_records, oem_id='madt')
madt = X86ACPIMadt(
local_apic_address=0, records=madt_records, oem_id="madt"
)
workload.acpi_description_table_pointer.rsdt.entries.append(madt)
workload.acpi_description_table_pointer.xsdt.entries.append(madt)
workload.acpi_description_table_pointer.oem_id = 'gem5'
workload.acpi_description_table_pointer.rsdt.oem_id='gem5'
workload.acpi_description_table_pointer.xsdt.oem_id='gem5'
workload.acpi_description_table_pointer.oem_id = "gem5"
workload.acpi_description_table_pointer.rsdt.oem_id = "gem5"
workload.acpi_description_table_pointer.xsdt.oem_id = "gem5"
return self
def makeLinuxX86System(mem_mode, numCPUs=1, mdesc=None, Ruby=False,
cmdline=None):
def makeLinuxX86System(
mem_mode, numCPUs=1, mdesc=None, Ruby=False, cmdline=None
):
# Build up the x86 system and then specialize it for Linux
self = makeX86System(mem_mode, numCPUs, mdesc, X86FsLinux(), Ruby)
# We assume below that there's at least 1MB of memory. We'll require 2
# just to avoid corner cases.
phys_mem_size = sum([r.size() for r in self.mem_ranges])
assert(phys_mem_size >= 0x200000)
assert(len(self.mem_ranges) <= 2)
assert phys_mem_size >= 0x200000
assert len(self.mem_ranges) <= 2
entries = \
[
entries = [
# Mark the first megabyte of memory as reserved
X86E820Entry(addr = 0, size = '639kB', range_type = 1),
X86E820Entry(addr = 0x9fc00, size = '385kB', range_type = 2),
X86E820Entry(addr=0, size="639kB", range_type=1),
X86E820Entry(addr=0x9FC00, size="385kB", range_type=2),
# Mark the rest of physical memory as available
X86E820Entry(addr = 0x100000,
size = '%dB' % (self.mem_ranges[0].size() - 0x100000),
range_type = 1),
]
X86E820Entry(
addr=0x100000,
size="%dB" % (self.mem_ranges[0].size() - 0x100000),
range_type=1,
),
]
# Mark [mem_size, 3GB) as reserved if memory less than 3GB, which force
# IO devices to be mapped to [0xC0000000, 0xFFFF0000). Requests to this
# specific range can pass though bridge to iobus.
if len(self.mem_ranges) == 1:
entries.append(X86E820Entry(addr = self.mem_ranges[0].size(),
size='%dB' % (0xC0000000 - self.mem_ranges[0].size()),
range_type=2))
entries.append(
X86E820Entry(
addr=self.mem_ranges[0].size(),
size="%dB" % (0xC0000000 - self.mem_ranges[0].size()),
range_type=2,
)
)
# Reserve the last 16kB of the 32-bit address space for the m5op interface
entries.append(X86E820Entry(addr=0xFFFF0000, size='64kB', range_type=2))
entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2))
# In case the physical memory is greater than 3GB, we split it into two
# parts and add a separate e820 entry for the second part. This entry
# starts at 0x100000000, which is the first address after the space
# reserved for devices.
if len(self.mem_ranges) == 2:
entries.append(X86E820Entry(addr = 0x100000000,
size = '%dB' % (self.mem_ranges[1].size()), range_type = 1))
entries.append(
X86E820Entry(
addr=0x100000000,
size="%dB" % (self.mem_ranges[1].size()),
range_type=1,
)
)
self.workload.e820_table.entries = entries
# Command line
if not cmdline:
cmdline = 'earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1'
cmdline = "earlyprintk=ttyS0 console=ttyS0 lpj=7999923 root=/dev/hda1"
self.workload.command_line = fillInCmdline(mdesc, cmdline)
return self
def makeBareMetalRiscvSystem(mem_mode, mdesc=None, cmdline=None):
self = System()
if not mdesc:
@@ -646,7 +730,7 @@ def makeBareMetalRiscvSystem(mem_mode, mdesc=None, cmdline=None):
self.iobus = IOXBar()
self.membus = MemBus()
self.bridge = Bridge(delay='50ns')
self.bridge = Bridge(delay="50ns")
self.bridge.mem_side_port = self.iobus.cpu_side_ports
self.bridge.cpu_side_port = self.membus.mem_side_ports
# Sv39 has 56 bit physical addresses; use the upper 8 bit for the IO space
@@ -656,16 +740,17 @@ def makeBareMetalRiscvSystem(mem_mode, mdesc=None, cmdline=None):
self.system_port = self.membus.cpu_side_ports
return self
def makeDualRoot(full_system, testSystem, driveSystem, dumpfile):
self = Root(full_system = full_system)
self = Root(full_system=full_system)
self.testsys = testSystem
self.drivesys = driveSystem
self.etherlink = EtherLink()
if hasattr(testSystem, 'realview'):
if hasattr(testSystem, "realview"):
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
elif hasattr(testSystem, 'tsunami'):
elif hasattr(testSystem, "tsunami"):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
else:
@@ -678,31 +763,35 @@ def makeDualRoot(full_system, testSystem, driveSystem, dumpfile):
return self
def makeDistRoot(testSystem,
rank,
size,
server_name,
server_port,
sync_repeat,
sync_start,
linkspeed,
linkdelay,
dumpfile):
self = Root(full_system = True)
def makeDistRoot(
testSystem,
rank,
size,
server_name,
server_port,
sync_repeat,
sync_start,
linkspeed,
linkdelay,
dumpfile,
):
self = Root(full_system=True)
self.testsys = testSystem
self.etherlink = DistEtherLink(speed = linkspeed,
delay = linkdelay,
dist_rank = rank,
dist_size = size,
server_name = server_name,
server_port = server_port,
sync_start = sync_start,
sync_repeat = sync_repeat)
self.etherlink = DistEtherLink(
speed=linkspeed,
delay=linkdelay,
dist_rank=rank,
dist_size=size,
server_name=server_name,
server_port=server_port,
sync_start=sync_start,
sync_repeat=sync_repeat,
)
if hasattr(testSystem, 'realview'):
if hasattr(testSystem, "realview"):
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
elif hasattr(testSystem, 'tsunami'):
elif hasattr(testSystem, "tsunami"):
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
else:
fatal("Don't know how to connect DistEtherLink to this system")

View File

@@ -48,21 +48,25 @@ from os.path import join as joinpath
from os.path import isdir
from shutil import rmtree, copyfile
def hex_mask(terms):
dec_mask = reduce(operator.or_, [2**i for i in terms], 0)
return "%08x" % dec_mask
def file_append(path, contents):
with open(joinpath(*path), 'a') as f:
with open(joinpath(*path), "a") as f:
f.write(str(contents))
def replace_tree(path):
if isdir(path):
rmtree(path)
mkdir(path)
def config_filesystem(system, options = None):
""" This function parses the system object to create the pseudo file system
def config_filesystem(system, options=None):
"""This function parses the system object to create the pseudo file system
@param system: The system to create the config for
@param options: An optional argument which contains an Options.py options
object. This is useful if when use se.py and will set the L2 cache
@@ -79,167 +83,200 @@ def config_filesystem(system, options = None):
These files are created in the `fs` directory in the outdir path.
"""
fsdir = joinpath(m5.options.outdir, 'fs')
fsdir = joinpath(m5.options.outdir, "fs")
replace_tree(fsdir)
# Set up /proc
procdir = joinpath(fsdir, 'proc')
procdir = joinpath(fsdir, "proc")
mkdir(procdir)
try:
cpus = \
[obj for obj in system.descendants() if isinstance(obj, BaseCPU)]
cpus = [
obj for obj in system.descendants() if isinstance(obj, BaseCPU)
]
except NameError:
# BaseCPU is not defined for the NULL ISA
cpus = []
cpu_clock = 0
if hasattr(options, 'cpu_clock'):
if hasattr(options, "cpu_clock"):
cpu_clock = toFrequency(options.cpu_clock) / mega
l2_size = 0
if hasattr(options, 'l2_size'):
if hasattr(options, "l2_size"):
l2_size = toMemorySize(options.l2_size) / kibi
for i,cpu in enumerate(cpus):
one_cpu = 'processor : {proc}\n' + \
'vendor_id : Generic\n' + \
'cpu family : 0\n' + \
'model : 0\n' + \
'model name : Generic\n' + \
'stepping : 0\n' + \
'cpu MHz : {clock:0.3f}\n' + \
'cache size: : {l2_size}K\n' + \
'physical id : 0\n' + \
'siblings : {num_cpus}\n' + \
'core id : {proc}\n' + \
'cpu cores : {num_cpus}\n' + \
'fpu : yes\n' + \
'fpu exception : yes\n' + \
'cpuid level : 1\n' + \
'wp : yes\n' + \
'flags : fpu\n' + \
'cache alignment : {cacheline_size}\n' + \
'\n'
one_cpu = one_cpu.format(proc = i, num_cpus = len(cpus),
# Note: it would be nice to use cpu.clock, but it hasn't
# been finalized yet since m5.instantiate() isn't done.
clock = cpu_clock,
# Note: this assumes the L2 is private to each core
l2_size = l2_size,
cacheline_size=system.cache_line_size.getValue())
file_append((procdir, 'cpuinfo'), one_cpu)
for i, cpu in enumerate(cpus):
one_cpu = (
"processor : {proc}\n"
+ "vendor_id : Generic\n"
+ "cpu family : 0\n"
+ "model : 0\n"
+ "model name : Generic\n"
+ "stepping : 0\n"
+ "cpu MHz : {clock:0.3f}\n"
+ "cache size: : {l2_size}K\n"
+ "physical id : 0\n"
+ "siblings : {num_cpus}\n"
+ "core id : {proc}\n"
+ "cpu cores : {num_cpus}\n"
+ "fpu : yes\n"
+ "fpu exception : yes\n"
+ "cpuid level : 1\n"
+ "wp : yes\n"
+ "flags : fpu\n"
+ "cache alignment : {cacheline_size}\n"
+ "\n"
)
one_cpu = one_cpu.format(
proc=i,
num_cpus=len(cpus),
# Note: it would be nice to use cpu.clock, but it hasn't
# been finalized yet since m5.instantiate() isn't done.
clock=cpu_clock,
# Note: this assumes the L2 is private to each core
l2_size=l2_size,
cacheline_size=system.cache_line_size.getValue(),
)
file_append((procdir, "cpuinfo"), one_cpu)
file_append((procdir, 'stat'), 'cpu 0 0 0 0 0 0 0\n')
file_append((procdir, "stat"), "cpu 0 0 0 0 0 0 0\n")
for i in range(len(cpus)):
file_append((procdir, 'stat'), 'cpu%d 0 0 0 0 0 0 0\n' % i)
file_append((procdir, "stat"), "cpu%d 0 0 0 0 0 0 0\n" % i)
# Set up /sys
sysdir = joinpath(fsdir, 'sys')
sysdir = joinpath(fsdir, "sys")
mkdir(sysdir)
# Set up /sys/devices/system/cpu
cpudir = joinpath(sysdir, 'devices', 'system', 'cpu')
cpudir = joinpath(sysdir, "devices", "system", "cpu")
makedirs(cpudir, exist_ok=True)
file_append((cpudir, 'online'), '0-%d' % (len(cpus) - 1))
file_append((cpudir, 'possible'), '0-%d' % (len(cpus) - 1))
file_append((cpudir, "online"), "0-%d" % (len(cpus) - 1))
file_append((cpudir, "possible"), "0-%d" % (len(cpus) - 1))
# Set up /tmp
tmpdir = joinpath(fsdir, 'tmp')
tmpdir = joinpath(fsdir, "tmp")
replace_tree(tmpdir)
system.redirect_paths = _redirect_paths(options)
# Setting the interpreter path. This is used to load the
# guest dynamic linker itself from the elf file.
interp = getattr(options, 'interp_dir', None)
interp = getattr(options, "interp_dir", None)
if interp:
from m5.core import setInterpDir
setInterpDir(interp)
print("Setting the interpreter path to:", interp,
"\nFor dynamically linked applications you might still "
"need to setup the --redirects so that libraries are "
"found\n")
print(
"Setting the interpreter path to:",
interp,
"\nFor dynamically linked applications you might still "
"need to setup the --redirects so that libraries are "
"found\n",
)
def register_node(cpu_list, mem, node_number):
nodebasedir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices',
'system', 'node')
nodebasedir = joinpath(
m5.options.outdir, "fs", "sys", "devices", "system", "node"
)
nodedir = joinpath(nodebasedir,'node%d' % node_number)
nodedir = joinpath(nodebasedir, "node%d" % node_number)
makedirs(nodedir, exist_ok=True)
file_append((nodedir, 'cpumap'), hex_mask(cpu_list))
file_append((nodedir, 'meminfo'),
'Node %d MemTotal: %dkB' % (node_number,
toMemorySize(str(mem))/kibi))
file_append((nodedir, "cpumap"), hex_mask(cpu_list))
file_append(
(nodedir, "meminfo"),
"Node %d MemTotal: %dkB"
% (node_number, toMemorySize(str(mem)) / kibi),
)
def register_cpu(physical_package_id, core_siblings,
core_id, thread_siblings):
cpudir = joinpath(m5.options.outdir, 'fs', 'sys', 'devices', 'system',
'cpu', 'cpu%d' % core_id)
makedirs(joinpath(cpudir, 'topology'), exist_ok=True)
makedirs(joinpath(cpudir, 'cache'))
def register_cpu(physical_package_id, core_siblings, core_id, thread_siblings):
cpudir = joinpath(
m5.options.outdir,
"fs",
"sys",
"devices",
"system",
"cpu",
"cpu%d" % core_id,
)
makedirs(joinpath(cpudir, "topology"), exist_ok=True)
makedirs(joinpath(cpudir, "cache"))
file_append((cpudir, "online"), "1")
file_append(
(cpudir, "topology", "physical_package_id"), physical_package_id
)
file_append((cpudir, "topology", "core_siblings"), hex_mask(core_siblings))
file_append((cpudir, "topology", "core_id"), core_id)
file_append(
(cpudir, "topology", "thread_siblings"), hex_mask(thread_siblings)
)
file_append((cpudir, 'online'), '1')
file_append((cpudir, 'topology', 'physical_package_id'),
physical_package_id)
file_append((cpudir, 'topology', 'core_siblings'),
hex_mask(core_siblings))
file_append((cpudir, 'topology', 'core_id'), core_id)
file_append((cpudir, 'topology', 'thread_siblings'),
hex_mask(thread_siblings))
def register_cache(level, idu_type, size, line_size, assoc, cpus):
fsdir = joinpath(m5.options.outdir, 'fs')
fsdir = joinpath(m5.options.outdir, "fs")
for i in cpus:
cachedir = joinpath(fsdir, 'sys', 'devices', 'system', 'cpu',
'cpu%d' % i, 'cache')
cachedir = joinpath(
fsdir, "sys", "devices", "system", "cpu", "cpu%d" % i, "cache"
)
j = 0
while isdir(joinpath(cachedir, 'index%d' % j)):
while isdir(joinpath(cachedir, "index%d" % j)):
j += 1
indexdir = joinpath(cachedir, 'index%d' % j)
indexdir = joinpath(cachedir, "index%d" % j)
makedirs(indexdir, exist_ok=True)
file_append((indexdir, 'level'), level)
file_append((indexdir, 'type'), idu_type)
file_append((indexdir, 'size'), "%dK" % (toMemorySize(size)/kibi))
file_append((indexdir, 'coherency_line_size'), line_size)
file_append((indexdir, "level"), level)
file_append((indexdir, "type"), idu_type)
file_append((indexdir, "size"), "%dK" % (toMemorySize(size) / kibi))
file_append((indexdir, "coherency_line_size"), line_size)
# Since cache size = number of indices * associativity * block size
num_sets = toMemorySize(size) / int(assoc) * int(line_size)
file_append((indexdir, 'number_of_sets'), num_sets)
file_append((indexdir, 'physical_line_partition'), '1')
file_append((indexdir, 'shared_cpu_map'), hex_mask(cpus))
file_append((indexdir, 'shared_cpu_list'),
','.join(str(cpu) for cpu in cpus))
file_append((indexdir, "number_of_sets"), num_sets)
file_append((indexdir, "physical_line_partition"), "1")
file_append((indexdir, "shared_cpu_map"), hex_mask(cpus))
file_append(
(indexdir, "shared_cpu_list"), ",".join(str(cpu) for cpu in cpus)
)
def _redirect_paths(options):
# Redirect filesystem syscalls from src to the first matching dests
redirect_paths = [RedirectPath(app_path = "/proc",
host_paths = ["%s/fs/proc" % m5.options.outdir]),
RedirectPath(app_path = "/sys",
host_paths = ["%s/fs/sys" % m5.options.outdir]),
RedirectPath(app_path = "/tmp",
host_paths = ["%s/fs/tmp" % m5.options.outdir])]
redirect_paths = [
RedirectPath(
app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
),
RedirectPath(
app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
),
RedirectPath(
app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
),
]
# Setting the redirect paths so that the guest dynamic linker
# can point to the proper /lib collection (e.g. to load libc)
redirects = getattr(options, 'redirects', [])
redirects = getattr(options, "redirects", [])
for redirect in redirects:
app_path, host_path = redirect.split("=")
redirect_paths.append(
RedirectPath(app_path = app_path, host_paths = [ host_path ]))
RedirectPath(app_path=app_path, host_paths=[host_path])
)
chroot = getattr(options, 'chroot', None)
chroot = getattr(options, "chroot", None)
if chroot:
redirect_paths.append(
RedirectPath(
app_path = "/",
host_paths = ["%s" % os.path.expanduser(chroot)]))
app_path="/", host_paths=["%s" % os.path.expanduser(chroot)]
)
)
return redirect_paths

View File

@@ -34,10 +34,12 @@
import m5
from m5.objects import *
def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
if full_system:
constructor_call = "VegaGPUTLB(\
constructor_call = (
"VegaGPUTLB(\
gpu_device = gpu_ctrl, \
size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
@@ -48,9 +50,12 @@ def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
voltage = options.gpu_voltage)))"
% locals()
)
else:
constructor_call = "X86GPUTLB(size = options.L%(level)dTLBentries, \
constructor_call = (
"X86GPUTLB(size = options.L%(level)dTLBentries, \
assoc = options.L%(level)dTLBassoc, \
hitLatency = options.L%(level)dAccessLatency,\
missLatency2 = options.L%(level)dMissLatency,\
@@ -59,13 +64,17 @@ def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
voltage = options.gpu_voltage)))"
% locals()
)
return constructor_call
def Coalescer_constructor(options, level, full_system):
if full_system:
constructor_call = "VegaTLBCoalescer(probesPerCycle = \
constructor_call = (
"VegaTLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
tlb_level = %(level)d ,\
coalescingWindow = options.L%(level)dCoalescingWindow,\
@@ -73,30 +82,47 @@ def Coalescer_constructor(options, level, full_system):
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
voltage = options.gpu_voltage)))"
% locals()
)
else:
constructor_call = "TLBCoalescer(probesPerCycle = \
constructor_call = (
"TLBCoalescer(probesPerCycle = \
options.L%(level)dProbesPerCycle, \
coalescingWindow = options.L%(level)dCoalescingWindow,\
disableCoalescing = options.L%(level)dDisableCoalescing,\
clk_domain = SrcClockDomain(\
clock = options.gpu_clock,\
voltage_domain = VoltageDomain(\
voltage = options.gpu_voltage)))" % locals()
voltage = options.gpu_voltage)))"
% locals()
)
return constructor_call
def create_TLB_Coalescer(options, my_level, my_index, tlb_name,
coalescer_name, gpu_ctrl=None, full_system=False):
def create_TLB_Coalescer(
options,
my_level,
my_index,
tlb_name,
coalescer_name,
gpu_ctrl=None,
full_system=False,
):
# arguments: options, TLB level, number of private structures for this
# Level, TLB name and Coalescer name
for i in range(my_index):
tlb_name.append(
eval(TLB_constructor(options, my_level, gpu_ctrl, full_system)))
eval(TLB_constructor(options, my_level, gpu_ctrl, full_system))
)
coalescer_name.append(
eval(Coalescer_constructor(options, my_level, full_system)))
eval(Coalescer_constructor(options, my_level, full_system))
)
def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
full_system=False):
def config_tlb_hierarchy(
options, system, shader_idx, gpu_ctrl=None, full_system=False
):
n_cu = options.num_compute_units
if options.TLB_config == "perLane":
@@ -111,36 +137,50 @@ def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
print("Bad option for TLB Configuration.")
sys.exit(1)
#-------------------------------------------------------------------------
# -------------------------------------------------------------------------
# A visual representation of the TLB hierarchy
# for ease of configuration
# < Modify here the width and the number of levels if you want a different
# configuration >
# width is the number of TLBs of the given type (i.e., D-TLB, I-TLB etc)
# for this level
L1 = [{'name': 'sqc', 'width': options.num_sqc, 'TLBarray': [],
'CoalescerArray': []},
{'name': 'scalar', 'width' : options.num_scalar_cache,
'TLBarray': [], 'CoalescerArray': []},
{'name': 'l1', 'width': num_TLBs, 'TLBarray': [],
'CoalescerArray': []}]
L1 = [
{
"name": "sqc",
"width": options.num_sqc,
"TLBarray": [],
"CoalescerArray": [],
},
{
"name": "scalar",
"width": options.num_scalar_cache,
"TLBarray": [],
"CoalescerArray": [],
},
{
"name": "l1",
"width": num_TLBs,
"TLBarray": [],
"CoalescerArray": [],
},
]
L2 = [{'name': 'l2', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
L3 = [{'name': 'l3', 'width': 1, 'TLBarray': [], 'CoalescerArray': []}]
L2 = [{"name": "l2", "width": 1, "TLBarray": [], "CoalescerArray": []}]
L3 = [{"name": "l3", "width": 1, "TLBarray": [], "CoalescerArray": []}]
TLB_hierarchy = [L1, L2, L3]
#-------------------------------------------------------------------------
# -------------------------------------------------------------------------
# Create the hiearchy
# Call the appropriate constructors and add objects to the system
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
level = i+1
level = i + 1
for TLB_type in hierarchy_level:
TLB_index = TLB_type['width']
TLB_array = TLB_type['TLBarray']
Coalescer_array = TLB_type['CoalescerArray']
TLB_index = TLB_type["width"]
TLB_array = TLB_type["TLBarray"]
Coalescer_array = TLB_type["CoalescerArray"]
# If the sim calls for a fixed L1 TLB size across CUs,
# override the TLB entries option
if options.tot_L1TLB_size:
@@ -148,71 +188,96 @@ def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
if options.L1TLBassoc > options.L1TLBentries:
options.L1TLBassoc = options.L1TLBentries
# call the constructors for the TLB and the Coalescer
create_TLB_Coalescer(options, level, TLB_index,\
TLB_array, Coalescer_array, gpu_ctrl, full_system)
create_TLB_Coalescer(
options,
level,
TLB_index,
TLB_array,
Coalescer_array,
gpu_ctrl,
full_system,
)
system_TLB_name = TLB_type['name'] + '_tlb'
system_Coalescer_name = TLB_type['name'] + '_coalescer'
system_TLB_name = TLB_type["name"] + "_tlb"
system_Coalescer_name = TLB_type["name"] + "_coalescer"
# add the different TLB levels to the system
# Modify here if you want to make the TLB hierarchy a child of
# the shader.
exec('system.%s = TLB_array' % system_TLB_name)
exec('system.%s = Coalescer_array' % system_Coalescer_name)
exec("system.%s = TLB_array" % system_TLB_name)
exec("system.%s = Coalescer_array" % system_Coalescer_name)
#===========================================================
# ===========================================================
# Specify the TLB hierarchy (i.e., port connections)
# All TLBs but the last level TLB need to have a memSidePort
#===========================================================
# ===========================================================
# Each TLB is connected with its Coalescer through a single port.
# There is a one-to-one mapping of TLBs to Coalescers at a given level
# This won't be modified no matter what the hierarchy looks like.
for i in range(len(TLB_hierarchy)):
hierarchy_level = TLB_hierarchy[i]
level = i+1
level = i + 1
for TLB_type in hierarchy_level:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system.%s_coalescer[%d].mem_side_ports[0] = \
system.%s_tlb[%d].cpu_side_ports[0]' % \
(name, index, name, index))
name = TLB_type["name"]
for index in range(TLB_type["width"]):
exec(
"system.%s_coalescer[%d].mem_side_ports[0] = \
system.%s_tlb[%d].cpu_side_ports[0]"
% (name, index, name, index)
)
# Connect the cpuSidePort of all the coalescers in level 1
# < Modify here if you want a different configuration >
for TLB_type in L1:
name = TLB_type['name']
num_TLBs = TLB_type['width']
if name == 'l1': # L1 D-TLBs
name = TLB_type["name"]
num_TLBs = TLB_type["width"]
if name == "l1": # L1 D-TLBs
tlb_per_cu = num_TLBs // n_cu
for cu_idx in range(n_cu):
if tlb_per_cu:
for tlb in range(tlb_per_cu):
exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, cu_idx, tlb,
cu_idx*tlb_per_cu+tlb, 0))
exec(
"system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]"
% (
shader_idx,
cu_idx,
tlb,
cu_idx * tlb_per_cu + tlb,
0,
)
)
else:
exec('system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, cu_idx, tlb_per_cu,
cu_idx / (n_cu / num_TLBs),
cu_idx % (n_cu / num_TLBs)))
elif name == 'sqc': # I-TLB
exec(
"system.cpu[%d].CUs[%d].translation_port[%d] = \
system.l1_coalescer[%d].cpu_side_ports[%d]"
% (
shader_idx,
cu_idx,
tlb_per_cu,
cu_idx / (n_cu / num_TLBs),
cu_idx % (n_cu / num_TLBs),
)
)
elif name == "sqc": # I-TLB
for index in range(n_cu):
sqc_tlb_index = index / options.cu_per_sqc
sqc_tlb_port_id = index % options.cu_per_sqc
exec('system.cpu[%d].CUs[%d].sqc_tlb_port = \
system.sqc_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, index, sqc_tlb_index, sqc_tlb_port_id))
elif name == 'scalar': # Scalar D-TLB
exec(
"system.cpu[%d].CUs[%d].sqc_tlb_port = \
system.sqc_coalescer[%d].cpu_side_ports[%d]"
% (shader_idx, index, sqc_tlb_index, sqc_tlb_port_id)
)
elif name == "scalar": # Scalar D-TLB
for index in range(n_cu):
scalar_tlb_index = index / options.cu_per_scalar_cache
scalar_tlb_port_id = index % options.cu_per_scalar_cache
exec('system.cpu[%d].CUs[%d].scalar_tlb_port = \
system.scalar_coalescer[%d].cpu_side_ports[%d]' % \
(shader_idx, index, scalar_tlb_index,
scalar_tlb_port_id))
exec(
"system.cpu[%d].CUs[%d].scalar_tlb_port = \
system.scalar_coalescer[%d].cpu_side_ports[%d]"
% (shader_idx, index, scalar_tlb_index, scalar_tlb_port_id)
)
# Connect the memSidePorts of all the TLBs with the
# cpuSidePorts of the Coalescers of the next level
@@ -220,23 +285,28 @@ def config_tlb_hierarchy(options, system, shader_idx, gpu_ctrl=None,
# L1 <-> L2
l2_coalescer_index = 0
for TLB_type in L1:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system.%s_tlb[%d].mem_side_ports[0] = \
system.l2_coalescer[0].cpu_side_ports[%d]' % \
(name, index, l2_coalescer_index))
name = TLB_type["name"]
for index in range(TLB_type["width"]):
exec(
"system.%s_tlb[%d].mem_side_ports[0] = \
system.l2_coalescer[0].cpu_side_ports[%d]"
% (name, index, l2_coalescer_index)
)
l2_coalescer_index += 1
# L2 <-> L3
system.l2_tlb[0].mem_side_ports[0] = \
system.l3_coalescer[0].cpu_side_ports[0]
system.l2_tlb[0].mem_side_ports[0] = system.l3_coalescer[0].cpu_side_ports[
0
]
# L3 TLB Vega page table walker to memory for full system only
if full_system:
for TLB_type in L3:
name = TLB_type['name']
for index in range(TLB_type['width']):
exec('system._dma_ports.append(system.%s_tlb[%d].walker)' % \
(name, index))
name = TLB_type["name"]
for index in range(TLB_type["width"]):
exec(
"system._dma_ports.append(system.%s_tlb[%d].walker)"
% (name, index)
)
return system

View File

@@ -27,77 +27,105 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
def tlb_options(parser):
#===================================================================
# ===================================================================
# TLB Configuration
#===================================================================
# ===================================================================
parser.add_argument(
"--TLB-config", type=str, default="perCU",
help="Options are: perCU (default), mono, 2CU, or perLane")
"--TLB-config",
type=str,
default="perCU",
help="Options are: perCU (default), mono, 2CU, or perLane",
)
#===================================================================
# ===================================================================
# L1 TLB Options (D-TLB, I-TLB, Dispatcher-TLB)
#===================================================================
# ===================================================================
parser.add_argument("--L1TLBentries", type=int, default="32")
parser.add_argument("--L1TLBassoc", type=int, default="32")
parser.add_argument("--L1AccessLatency", type=int, default="1",
help="latency in gpu cycles")
parser.add_argument("--L1MissLatency", type=int, default="750",
help="latency (in gpu cycles) of a page walk, "
"if this is a last level TLB")
parser.add_argument(
"--L1AccessLatency",
type=int,
default="1",
help="latency in gpu cycles",
)
parser.add_argument(
"--L1MissLatency",
type=int,
default="750",
help="latency (in gpu cycles) of a page walk, "
"if this is a last level TLB",
)
parser.add_argument("--L1MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L1AccessDistanceStat", action="store_true")
parser.add_argument("--tot-L1TLB-size", type=int, default="0")
#===================================================================
# ===================================================================
# L2 TLB Options
#===================================================================
# ===================================================================
parser.add_argument("--L2TLBentries", type=int, default="4096")
parser.add_argument("--L2TLBassoc", type=int, default="32")
parser.add_argument("--L2AccessLatency", type=int, default="69",
help="latency in gpu cycles")
parser.add_argument("--L2MissLatency", type=int, default="750",
help="latency (in gpu cycles) of a page walk, "
"if this is a last level TLB")
parser.add_argument(
"--L2AccessLatency",
type=int,
default="69",
help="latency in gpu cycles",
)
parser.add_argument(
"--L2MissLatency",
type=int,
default="750",
help="latency (in gpu cycles) of a page walk, "
"if this is a last level TLB",
)
parser.add_argument("--L2MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L2AccessDistanceStat", action="store_true")
#===================================================================
# ===================================================================
# L3 TLB Options
#===================================================================
# ===================================================================
parser.add_argument("--L3TLBentries", type=int, default="8192")
parser.add_argument("--L3TLBassoc", type=int, default="32")
parser.add_argument("--L3AccessLatency", type=int, default="150",
help="latency in gpu cycles")
parser.add_argument("--L3MissLatency", type=int, default="750",
help="latency (in gpu cycles) of a page walk")
parser.add_argument(
"--L3AccessLatency",
type=int,
default="150",
help="latency in gpu cycles",
)
parser.add_argument(
"--L3MissLatency",
type=int,
default="750",
help="latency (in gpu cycles) of a page walk",
)
parser.add_argument("--L3MaxOutstandingReqs", type=int, default="64")
parser.add_argument("--L3AccessDistanceStat", action="store_true")
#===================================================================
# ===================================================================
# L1 TLBCoalescer Options
#===================================================================
# ===================================================================
parser.add_argument("--L1ProbesPerCycle", type=int, default="2")
parser.add_argument("--L1CoalescingWindow", type=int, default="1")
parser.add_argument("--L1DisableCoalescing", action="store_true")
#===================================================================
# ===================================================================
# L2 TLBCoalescer Options
#===================================================================
# ===================================================================
parser.add_argument("--L2ProbesPerCycle", type=int, default="2")
parser.add_argument("--L2CoalescingWindow", type=int, default="1")
parser.add_argument("--L2DisableCoalescing", action="store_true")
#===================================================================
# ===================================================================
# L3 TLBCoalescer Options
#===================================================================
# ===================================================================
parser.add_argument("--L3ProbesPerCycle", type=int, default="2")
parser.add_argument("--L3CoalescingWindow", type=int, default="1")

View File

@@ -129,159 +129,303 @@ from m5.util import *
def add_options(parser):
# *****************************CROSSBAR PARAMETERS*************************
# Flit size of the main interconnect [1]
parser.add_argument("--xbar-width", default=32, action="store", type=int,
help="Data width of the main XBar (Bytes)")
parser.add_argument(
"--xbar-width",
default=32,
action="store",
type=int,
help="Data width of the main XBar (Bytes)",
)
# Clock frequency of the main interconnect [1]
# This crossbar, is placed on the logic-based of the HMC and it has its
# own voltage and clock domains, different from the DRAM dies or from the
# host.
parser.add_argument("--xbar-frequency", default='1GHz', type=str,
help="Clock Frequency of the main XBar")
parser.add_argument(
"--xbar-frequency",
default="1GHz",
type=str,
help="Clock Frequency of the main XBar",
)
# Arbitration latency of the HMC XBar [1]
parser.add_argument("--xbar-frontend-latency", default=1, action="store",
type=int, help="Arbitration latency of the XBar")
parser.add_argument(
"--xbar-frontend-latency",
default=1,
action="store",
type=int,
help="Arbitration latency of the XBar",
)
# Latency to forward a packet via the interconnect [1](two levels of FIFOs
# at the input and output of the inteconnect)
parser.add_argument("--xbar-forward-latency", default=2, action="store",
type=int, help="Forward latency of the XBar")
parser.add_argument(
"--xbar-forward-latency",
default=2,
action="store",
type=int,
help="Forward latency of the XBar",
)
# Latency to forward a response via the interconnect [1](two levels of
# FIFOs at the input and output of the inteconnect)
parser.add_argument("--xbar-response-latency", default=2, action="store",
type=int, help="Response latency of the XBar")
parser.add_argument(
"--xbar-response-latency",
default=2,
action="store",
type=int,
help="Response latency of the XBar",
)
# number of cross which connects 16 Vaults to serial link[7]
parser.add_argument("--number-mem-crossbar", default=4, action="store",
type=int, help="Number of crossbar in HMC")
parser.add_argument(
"--number-mem-crossbar",
default=4,
action="store",
type=int,
help="Number of crossbar in HMC",
)
# *****************************SERIAL LINK PARAMETERS**********************
# Number of serial links controllers [1]
parser.add_argument("--num-links-controllers", default=4, action="store",
type=int, help="Number of serial links")
parser.add_argument(
"--num-links-controllers",
default=4,
action="store",
type=int,
help="Number of serial links",
)
# Number of packets (not flits) to store at the request side of the serial
# link. This number should be adjusted to achive required bandwidth
parser.add_argument("--link-buffer-size-req", default=10, action="store",
type=int, help="Number of packets to buffer at the\
request side of the serial link")
parser.add_argument(
"--link-buffer-size-req",
default=10,
action="store",
type=int,
help="Number of packets to buffer at the\
request side of the serial link",
)
# Number of packets (not flits) to store at the response side of the serial
# link. This number should be adjusted to achive required bandwidth
parser.add_argument("--link-buffer-size-rsp", default=10, action="store",
type=int, help="Number of packets to buffer at the\
response side of the serial link")
parser.add_argument(
"--link-buffer-size-rsp",
default=10,
action="store",
type=int,
help="Number of packets to buffer at the\
response side of the serial link",
)
# Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
# the PCB trace latency (3ns Estimated based on [5])
parser.add_argument("--link-latency", default='4.6ns', type=str,
help="Latency of the serial links")
parser.add_argument(
"--link-latency",
default="4.6ns",
type=str,
help="Latency of the serial links",
)
# Clock frequency of the each serial link(SerDes) [1]
parser.add_argument("--link-frequency", default='10GHz', type=str,
help="Clock Frequency of the serial links")
parser.add_argument(
"--link-frequency",
default="10GHz",
type=str,
help="Clock Frequency of the serial links",
)
# Clock frequency of serial link Controller[6]
# clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
# data_path_width * 10^6
# clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
parser.add_argument("--link-controller-frequency", default='625MHz',
type=str, help="Clock Frequency of the link\
controller")
parser.add_argument(
"--link-controller-frequency",
default="625MHz",
type=str,
help="Clock Frequency of the link\
controller",
)
# Latency of the serial link controller to process the packets[1][6]
# (ClockDomain = 625 Mhz )
# used here for calculations only
parser.add_argument("--link-ctrl-latency", default=4, action="store",
type=int, help="The number of cycles required for the\
controller to process the packet")
parser.add_argument(
"--link-ctrl-latency",
default=4,
action="store",
type=int,
help="The number of cycles required for the\
controller to process the packet",
)
# total_ctrl_latency = link_ctrl_latency + link_latency
# total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
parser.add_argument("--total-ctrl-latency", default='11ns', type=str,
help="The latency experienced by every packet\
regardless of size of packet")
parser.add_argument(
"--total-ctrl-latency",
default="11ns",
type=str,
help="The latency experienced by every packet\
regardless of size of packet",
)
# Number of parallel lanes in each serial link [1]
parser.add_argument("--num-lanes-per-link", default=16, action="store",
type=int, help="Number of lanes per each link")
parser.add_argument(
"--num-lanes-per-link",
default=16,
action="store",
type=int,
help="Number of lanes per each link",
)
# Number of serial links [1]
parser.add_argument("--num-serial-links", default=4, action="store",
type=int, help="Number of serial links")
parser.add_argument(
"--num-serial-links",
default=4,
action="store",
type=int,
help="Number of serial links",
)
# speed of each lane of serial link - SerDes serial interface 10 Gb/s
parser.add_argument("--serial-link-speed", default=10, action="store",
type=int, help="Gbs/s speed of each lane of serial\
link")
parser.add_argument(
"--serial-link-speed",
default=10,
action="store",
type=int,
help="Gbs/s speed of each lane of serial\
link",
)
# address range for each of the serial links
parser.add_argument("--serial-link-addr-range", default='1GB', type=str,
help="memory range for each of the serial links.\
Default: 1GB")
parser.add_argument(
"--serial-link-addr-range",
default="1GB",
type=str,
help="memory range for each of the serial links.\
Default: 1GB",
)
# *****************************PERFORMANCE MONITORING*********************
# The main monitor behind the HMC Controller
parser.add_argument("--enable-global-monitor", action="store_true",
help="The main monitor behind the HMC Controller")
parser.add_argument(
"--enable-global-monitor",
action="store_true",
help="The main monitor behind the HMC Controller",
)
# The link performance monitors
parser.add_argument("--enable-link-monitor", action="store_true",
help="The link monitors")
parser.add_argument(
"--enable-link-monitor", action="store_true", help="The link monitors"
)
# link aggregator enable - put a cross between buffers & links
parser.add_argument("--enable-link-aggr", action="store_true", help="The\
crossbar between port and Link Controller")
parser.add_argument(
"--enable-link-aggr",
action="store_true",
help="The\
crossbar between port and Link Controller",
)
parser.add_argument("--enable-buff-div", action="store_true",
help="Memory Range of Buffer is ivided between total\
range")
parser.add_argument(
"--enable-buff-div",
action="store_true",
help="Memory Range of Buffer is ivided between total\
range",
)
# *****************************HMC ARCHITECTURE **************************
# Memory chunk for 16 vault - numbers of vault / number of crossbars
parser.add_argument("--mem-chunk", default=4, action="store", type=int,
help="Chunk of memory range for each cross bar in\
arch 0")
parser.add_argument(
"--mem-chunk",
default=4,
action="store",
type=int,
help="Chunk of memory range for each cross bar in\
arch 0",
)
# size of req buffer within crossbar, used for modelling extra latency
# when the reuqest go to non-local vault
parser.add_argument("--xbar-buffer-size-req", default=10, action="store",
type=int, help="Number of packets to buffer at the\
request side of the crossbar")
parser.add_argument(
"--xbar-buffer-size-req",
default=10,
action="store",
type=int,
help="Number of packets to buffer at the\
request side of the crossbar",
)
# size of response buffer within crossbar, used for modelling extra latency
# when the response received from non-local vault
parser.add_argument("--xbar-buffer-size-resp", default=10, action="store",
type=int, help="Number of packets to buffer at the\
response side of the crossbar")
parser.add_argument(
"--xbar-buffer-size-resp",
default=10,
action="store",
type=int,
help="Number of packets to buffer at the\
response side of the crossbar",
)
# HMC device architecture. It affects the HMC host controller as well
parser.add_argument("--arch", type=str, choices=["same", "distributed",
"mixed"], default="distributed", help="same: HMC with\
parser.add_argument(
"--arch",
type=str,
choices=["same", "distributed", "mixed"],
default="distributed",
help="same: HMC with\
4 links, all with same range.\ndistributed: HMC with\
4 links with distributed range.\nmixed: mixed with\
same and distributed range.\nDefault: distributed")
same and distributed range.\nDefault: distributed",
)
# HMC device - number of vaults
parser.add_argument("--hmc-dev-num-vaults", default=16, action="store",
type=int, help="number of independent vaults within\
parser.add_argument(
"--hmc-dev-num-vaults",
default=16,
action="store",
type=int,
help="number of independent vaults within\
the HMC device. Note: each vault has a memory\
controller (valut controller)\nDefault: 16")
controller (valut controller)\nDefault: 16",
)
# HMC device - vault capacity or size
parser.add_argument("--hmc-dev-vault-size", default='256MB', type=str,
help="vault storage capacity in bytes. Default:\
256MB")
parser.add_argument("--mem-type", type=str, choices=["HMC_2500_1x32"],
default="HMC_2500_1x32", help="type of HMC memory to\
use. Default: HMC_2500_1x32")
parser.add_argument("--mem-channels", default=1, action="store", type=int,
help="Number of memory channels")
parser.add_argument("--mem-ranks", default=1, action="store", type=int,
help="Number of ranks to iterate across")
parser.add_argument("--burst-length", default=256, action="store",
type=int, help="burst length in bytes. Note: the\
parser.add_argument(
"--hmc-dev-vault-size",
default="256MB",
type=str,
help="vault storage capacity in bytes. Default:\
256MB",
)
parser.add_argument(
"--mem-type",
type=str,
choices=["HMC_2500_1x32"],
default="HMC_2500_1x32",
help="type of HMC memory to\
use. Default: HMC_2500_1x32",
)
parser.add_argument(
"--mem-channels",
default=1,
action="store",
type=int,
help="Number of memory channels",
)
parser.add_argument(
"--mem-ranks",
default=1,
action="store",
type=int,
help="Number of ranks to iterate across",
)
parser.add_argument(
"--burst-length",
default=256,
action="store",
type=int,
help="burst length in bytes. Note: the\
cache line size will be set to this value.\nDefault:\
256")
256",
)
# configure HMC host controller
@@ -292,8 +436,8 @@ def config_hmc_host_ctrl(opt, system):
# Create additional crossbar for arch1
if opt.arch == "distributed" or opt.arch == "mixed":
clk = '100GHz'
vd = VoltageDomain(voltage='1V')
clk = "100GHz"
vd = VoltageDomain(voltage="1V")
# Create additional crossbar for arch1
system.membus = NoncoherentXBar(width=8)
system.membus.badaddr_responder = BadAddr()
@@ -310,42 +454,50 @@ def config_hmc_host_ctrl(opt, system):
# Memmory ranges of serial link for arch-0. Same as the ranges of vault
# controllers (4 vaults to 1 serial link)
if opt.arch == "same":
ser_ranges = [AddrRange(0, (4*slar)-1) for i in
range(opt.num_serial_links)]
ser_ranges = [
AddrRange(0, (4 * slar) - 1) for i in range(opt.num_serial_links)
]
# Memmory ranges of serial link for arch-1. Distributed range accross
# links
if opt.arch == "distributed":
ser_ranges = [AddrRange(i*slar, ((i+1)*slar)-1) for i in
range(opt.num_serial_links)]
ser_ranges = [
AddrRange(i * slar, ((i + 1) * slar) - 1)
for i in range(opt.num_serial_links)
]
# Memmory ranges of serial link for arch-2 'Mixed' address distribution
# over links
if opt.arch == "mixed":
ser_range0 = AddrRange(0, (1*slar)-1)
ser_range1 = AddrRange(1*slar, 2*slar-1)
ser_range2 = AddrRange(0, (4*slar)-1)
ser_range3 = AddrRange(0, (4*slar)-1)
ser_range0 = AddrRange(0, (1 * slar) - 1)
ser_range1 = AddrRange(1 * slar, 2 * slar - 1)
ser_range2 = AddrRange(0, (4 * slar) - 1)
ser_range3 = AddrRange(0, (4 * slar) - 1)
ser_ranges = [ser_range0, ser_range1, ser_range2, ser_range3]
# Serial link Controller with 16 SerDes links at 10 Gbps with serial link
# ranges w.r.t to architecture
sl = [SerialLink(ranges=ser_ranges[i],
req_size=opt.link_buffer_size_req,
resp_size=opt.link_buffer_size_rsp,
num_lanes=opt.num_lanes_per_link,
link_speed=opt.serial_link_speed,
delay=opt.total_ctrl_latency) for i in
range(opt.num_serial_links)]
sl = [
SerialLink(
ranges=ser_ranges[i],
req_size=opt.link_buffer_size_req,
resp_size=opt.link_buffer_size_rsp,
num_lanes=opt.num_lanes_per_link,
link_speed=opt.serial_link_speed,
delay=opt.total_ctrl_latency,
)
for i in range(opt.num_serial_links)
]
system.hmc_host.seriallink = sl
# enable global monitor
if opt.enable_global_monitor:
system.hmc_host.lmonitor = [CommMonitor() for i in
range(opt.num_serial_links)]
system.hmc_host.lmonitor = [
CommMonitor() for i in range(opt.num_serial_links)
]
# set the clock frequency for serial link
for i in range(opt.num_serial_links):
clk = opt.link_controller_frequency
vd = VoltageDomain(voltage='1V')
vd = VoltageDomain(voltage="1V")
scd = SrcClockDomain(clock=clk, voltage_domain=vd)
system.hmc_host.seriallink[i].clk_domain = scd
@@ -387,8 +539,10 @@ def config_hmc_dev(opt, system, hmc_host):
# create memory ranges for the vault controllers
arv = convert.toMemorySize(opt.hmc_dev_vault_size)
addr_ranges_vaults = [AddrRange(i*arv, ((i+1)*arv-1)) for i in
range(opt.hmc_dev_num_vaults)]
addr_ranges_vaults = [
AddrRange(i * arv, ((i + 1) * arv - 1))
for i in range(opt.hmc_dev_num_vaults)
]
system.mem_ranges = addr_ranges_vaults
if opt.enable_link_monitor:
@@ -396,29 +550,36 @@ def config_hmc_dev(opt, system, hmc_host):
system.hmc_dev.lmonitor = lm
# 4 HMC Crossbars located in its logic-base (LoB)
xb = [NoncoherentXBar(width=opt.xbar_width,
frontend_latency=opt.xbar_frontend_latency,
forward_latency=opt.xbar_forward_latency,
response_latency=opt.xbar_response_latency) for i in
range(opt.number_mem_crossbar)]
xb = [
NoncoherentXBar(
width=opt.xbar_width,
frontend_latency=opt.xbar_frontend_latency,
forward_latency=opt.xbar_forward_latency,
response_latency=opt.xbar_response_latency,
)
for i in range(opt.number_mem_crossbar)
]
system.hmc_dev.xbar = xb
for i in range(opt.number_mem_crossbar):
clk = opt.xbar_frequency
vd = VoltageDomain(voltage='1V')
vd = VoltageDomain(voltage="1V")
scd = SrcClockDomain(clock=clk, voltage_domain=vd)
system.hmc_dev.xbar[i].clk_domain = scd
# Attach 4 serial link to 4 crossbar/s
for i in range(opt.num_serial_links):
if opt.enable_link_monitor:
system.hmc_host.seriallink[i].mem_side_port = \
system.hmc_dev.lmonitor[i].cpu_side_port
system.hmc_dev.lmonitor[i].mem_side_port = \
system.hmc_dev.xbar[i].cpu_side_ports
system.hmc_host.seriallink[
i
].mem_side_port = system.hmc_dev.lmonitor[i].cpu_side_port
system.hmc_dev.lmonitor[i].mem_side_port = system.hmc_dev.xbar[
i
].cpu_side_ports
else:
system.hmc_host.seriallink[i].mem_side_port = \
system.hmc_dev.xbar[i].cpu_side_ports
system.hmc_host.seriallink[i].mem_side_port = system.hmc_dev.xbar[
i
].cpu_side_ports
# Connecting xbar with each other for request arriving at the wrong xbar,
# then it will be forward to correct xbar. Bridge is used to connect xbars
@@ -426,9 +587,13 @@ def config_hmc_dev(opt, system, hmc_host):
numx = len(system.hmc_dev.xbar)
# create a list of buffers
system.hmc_dev.buffers = [Bridge(req_size=opt.xbar_buffer_size_req,
resp_size=opt.xbar_buffer_size_resp)
for i in range(numx*(opt.mem_chunk-1))]
system.hmc_dev.buffers = [
Bridge(
req_size=opt.xbar_buffer_size_req,
resp_size=opt.xbar_buffer_size_resp,
)
for i in range(numx * (opt.mem_chunk - 1))
]
# Buffer iterator
it = iter(list(range(len(system.hmc_dev.buffers))))
@@ -446,14 +611,18 @@ def config_hmc_dev(opt, system, hmc_host):
# Change the default values for ranges of bridge
system.hmc_dev.buffers[index].ranges = system.mem_ranges[
j * int(opt.mem_chunk):
(j + 1) * int(opt.mem_chunk)]
j * int(opt.mem_chunk) : (j + 1) * int(opt.mem_chunk)
]
# Connect the bridge between corssbars
system.hmc_dev.xbar[i].mem_side_ports = \
system.hmc_dev.buffers[index].cpu_side_port
system.hmc_dev.buffers[index].mem_side_port = \
system.hmc_dev.xbar[j].cpu_side_ports
system.hmc_dev.xbar[
i
].mem_side_ports = system.hmc_dev.buffers[
index
].cpu_side_port
system.hmc_dev.buffers[
index
].mem_side_port = system.hmc_dev.xbar[j].cpu_side_ports
else:
# Don't connect the xbar to itself
pass
@@ -462,37 +631,49 @@ def config_hmc_dev(opt, system, hmc_host):
# can only direct traffic to it local vaults
if opt.arch == "mixed":
system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
system.hmc_dev.xbar[3].mem_side_ports = \
system.hmc_dev.buffer30.cpu_side_port
system.hmc_dev.buffer30.mem_side_port = \
system.hmc_dev.xbar[0].cpu_side_ports
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer30.cpu_side_port
system.hmc_dev.buffer30.mem_side_port = system.hmc_dev.xbar[
0
].cpu_side_ports
system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
system.hmc_dev.xbar[3].mem_side_ports = \
system.hmc_dev.buffer31.cpu_side_port
system.hmc_dev.buffer31.mem_side_port = \
system.hmc_dev.xbar[1].cpu_side_ports
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer31.cpu_side_port
system.hmc_dev.buffer31.mem_side_port = system.hmc_dev.xbar[
1
].cpu_side_ports
system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
system.hmc_dev.xbar[3].mem_side_ports = \
system.hmc_dev.buffer32.cpu_side_port
system.hmc_dev.buffer32.mem_side_port = \
system.hmc_dev.xbar[2].cpu_side_ports
system.hmc_dev.xbar[
3
].mem_side_ports = system.hmc_dev.buffer32.cpu_side_port
system.hmc_dev.buffer32.mem_side_port = system.hmc_dev.xbar[
2
].cpu_side_ports
system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
system.hmc_dev.xbar[2].mem_side_ports = \
system.hmc_dev.buffer20.cpu_side_port
system.hmc_dev.buffer20.mem_side_port = \
system.hmc_dev.xbar[0].cpu_side_ports
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer20.cpu_side_port
system.hmc_dev.buffer20.mem_side_port = system.hmc_dev.xbar[
0
].cpu_side_ports
system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
system.hmc_dev.xbar[2].mem_side_ports = \
system.hmc_dev.buffer21.cpu_side_port
system.hmc_dev.buffer21.mem_side_port = \
system.hmc_dev.xbar[1].cpu_side_ports
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer21.cpu_side_port
system.hmc_dev.buffer21.mem_side_port = system.hmc_dev.xbar[
1
].cpu_side_ports
system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
system.hmc_dev.xbar[2].mem_side_ports = \
system.hmc_dev.buffer23.cpu_side_port
system.hmc_dev.buffer23.mem_side_port = \
system.hmc_dev.xbar[3].cpu_side_ports
system.hmc_dev.xbar[
2
].mem_side_ports = system.hmc_dev.buffer23.cpu_side_port
system.hmc_dev.buffer23.mem_side_port = system.hmc_dev.xbar[
3
].cpu_side_ports

View File

@@ -37,8 +37,8 @@ import m5.objects
from common import ObjectList
from common import HMC
def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
xor_low_bit):
def create_mem_intf(intf, r, i, intlv_bits, intlv_size, xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
options. This function is invoked multiple times in config_mem function
@@ -46,6 +46,7 @@ def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
"""
import math
intlv_low_bit = int(math.log(intlv_size, 2))
# Use basic hashing for the channel selection, and preferably use
@@ -53,7 +54,7 @@ def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
# the details of the caches here, make an educated guess. 4 MByte
# 4-way associative with 64 byte cache lines is 6 offset bits and
# 14 index bits.
if (xor_low_bit):
if xor_low_bit:
xor_high_bit = xor_low_bit + intlv_bits - 1
else:
xor_high_bit = 0
@@ -67,13 +68,15 @@ def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
if interface.addr_mapping.value == 'RoRaBaChCo':
if interface.addr_mapping.value == "RoRaBaChCo":
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
# one
rowbuffer_size = interface.device_rowbuffer_size.value * \
interface.devices_per_rank.value
rowbuffer_size = (
interface.device_rowbuffer_size.value
* interface.devices_per_rank.value
)
intlv_low_bit = int(math.log(rowbuffer_size, 2))
@@ -83,7 +86,7 @@ def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
# If the channel bits are appearing after the low order
# address bits (buffer bits), we need to add the appropriate
# number of bits for the buffer size
if interface.addr_mapping.value == 'RoRaBaChCo':
if interface.addr_mapping.value == "RoRaBaChCo":
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
@@ -94,14 +97,17 @@ def create_mem_intf(intf, r, i, intlv_bits, intlv_size,
# We got all we need to configure the appropriate address
# range
interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = \
intlv_low_bit + intlv_bits - 1,
xorHighBit = xor_high_bit,
intlvBits = intlv_bits,
intlvMatch = i)
interface.range = m5.objects.AddrRange(
r.start,
size=r.size(),
intlvHighBit=intlv_low_bit + intlv_bits - 1,
xorHighBit=xor_high_bit,
intlvBits=intlv_bits,
intlvMatch=i,
)
return interface
def config_mem(options, system):
"""
Create the memory controllers based on the options and attach them.
@@ -125,8 +131,9 @@ def config_mem(options, system):
# Optional options
opt_tlm_memory = getattr(options, "tlm_memory", None)
opt_external_memory_system = getattr(options, "external_memory_system",
None)
opt_external_memory_system = getattr(
options, "external_memory_system", None
)
opt_elastic_trace_en = getattr(options, "elastic_trace_en", False)
opt_mem_ranks = getattr(options, "mem_ranks", None)
opt_nvm_ranks = getattr(options, "nvm_ranks", None)
@@ -149,15 +156,18 @@ def config_mem(options, system):
port_type="tlm_slave",
port_data=opt_tlm_memory,
port=system.membus.mem_side_ports,
addr_ranges=system.mem_ranges)
addr_ranges=system.mem_ranges,
)
system.workload.addr_check = False
return
if opt_external_memory_system:
subsystem.external_memory = m5.objects.ExternalSlave(
port_type=opt_external_memory_system,
port_data="init_mem0", port=xbar.mem_side_ports,
addr_ranges=system.mem_ranges)
port_data="init_mem0",
port=xbar.mem_side_ports,
addr_ranges=system.mem_ranges,
)
subsystem.workload.addr_check = False
return
@@ -165,8 +175,9 @@ def config_mem(options, system):
import math
from m5.util import fatal
intlv_bits = int(math.log(nbr_mem_ctrls, 2))
if 2 ** intlv_bits != nbr_mem_ctrls:
if 2**intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
if opt_mem_type:
@@ -178,8 +189,10 @@ def config_mem(options, system):
mem_ctrls = []
if opt_elastic_trace_en and not issubclass(intf, m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
fatal(
"When elastic trace is enabled, configure mem-type as "
"simple-mem."
)
# The default behaviour is to interleave memory channels on 128
# byte granularity, or cache line granularity if larger than 128
@@ -199,13 +212,16 @@ def config_mem(options, system):
for i in range(nbr_mem_ctrls):
if opt_mem_type and (not opt_nvm_type or range_iter % 2 != 0):
# Create the DRAM interface
dram_intf = create_mem_intf(intf, r, i,
intlv_bits, intlv_size, opt_xor_low_bit)
dram_intf = create_mem_intf(
intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
)
# Set the number of ranks based on the command-line
# options if it was explicitly set
if issubclass(intf, m5.objects.DRAMInterface) and \
opt_mem_ranks:
if (
issubclass(intf, m5.objects.DRAMInterface)
and opt_mem_ranks
):
dram_intf.ranks_per_channel = opt_mem_ranks
# Enable low-power DRAM states if option is set
@@ -213,9 +229,11 @@ def config_mem(options, system):
dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
dram_intf.latency = "1ns"
print(
"For elastic trace, over-riding Simple Memory "
"latency to 1ns."
)
# Create the controller that will drive the interface
mem_ctrl = dram_intf.controller()
@@ -223,13 +241,16 @@ def config_mem(options, system):
mem_ctrls.append(mem_ctrl)
elif opt_nvm_type and (not opt_mem_type or range_iter % 2 == 0):
nvm_intf = create_mem_intf(n_intf, r, i,
intlv_bits, intlv_size, opt_xor_low_bit)
nvm_intf = create_mem_intf(
n_intf, r, i, intlv_bits, intlv_size, opt_xor_low_bit
)
# Set the number of ranks based on the command-line
# options if it was explicitly set
if issubclass(n_intf, m5.objects.NVMInterface) and \
opt_nvm_ranks:
if (
issubclass(n_intf, m5.objects.NVMInterface)
and opt_nvm_ranks
):
nvm_intf.ranks_per_channel = opt_nvm_ranks
# Create a controller if not sharing a channel with DRAM
@@ -244,13 +265,13 @@ def config_mem(options, system):
# hook up NVM interface when channel is shared with DRAM + NVM
for i in range(len(nvm_intfs)):
mem_ctrls[i].nvm = nvm_intfs[i];
mem_ctrls[i].nvm = nvm_intfs[i]
# Connect the controller to the xbar port
for i in range(len(mem_ctrls)):
if opt_mem_type == "HMC_2500_1x32":
# Connect the controllers to the membus
mem_ctrls[i].port = xbar[i//4].mem_side_ports
mem_ctrls[i].port = xbar[i // 4].mem_side_ports
# Set memory device size. There is an independent controller
# for each vault. All vaults are same size.
mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size

View File

@@ -34,18 +34,20 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from gem5.runtime import get_supported_isas
import m5.objects
import m5.internal.params
import inspect
import sys
from textwrap import TextWrapper
class ObjectList(object):
""" Creates a list of objects that are sub-classes of a given class. """
"""Creates a list of objects that are sub-classes of a given class."""
def _is_obj_class(self, cls):
"""Determine if a class is a a sub class of the provided base class
that can be instantiated.
that can be instantiated.
"""
# We can't use the normal inspect.isclass because the ParamFactory
@@ -63,16 +65,20 @@ class ObjectList(object):
sub_cls = self._sub_classes[real_name]
return sub_cls
except KeyError:
print("{} is not a valid sub-class of {}.".format(name, \
self.base_cls))
print(
"{} is not a valid sub-class of {}.".format(
name, self.base_cls
)
)
raise
def print(self):
"""Print a list of available sub-classes and aliases."""
print("Available {} classes:".format(self.base_cls))
doc_wrapper = TextWrapper(initial_indent="\t\t",
subsequent_indent="\t\t")
doc_wrapper = TextWrapper(
initial_indent="\t\t", subsequent_indent="\t\t"
)
for name, cls in list(self._sub_classes.items()):
print("\t{}".format(name))
@@ -117,6 +123,7 @@ class ObjectList(object):
self._aliases = {}
self._add_aliases(aliases)
class CPUList(ObjectList):
def _is_obj_class(self, cls):
"""Determine if a class is a CPU that can be instantiated"""
@@ -124,35 +131,42 @@ class CPUList(ObjectList):
# We can't use the normal inspect.isclass because the ParamFactory
# and ProxyFactory classes have a tendency to confuse it.
try:
return super(CPUList, self)._is_obj_class(cls) and \
not issubclass(cls, m5.objects.CheckerCPU)
return super(CPUList, self)._is_obj_class(cls) and not issubclass(
cls, m5.objects.CheckerCPU
)
except (TypeError, AttributeError):
return False
def _add_objects(self):
super(CPUList, self)._add_objects()
from m5.defines import buildEnv
from importlib import import_module
for package in [ "generic", buildEnv['TARGET_ISA']]:
for isa in {
"generic",
} | {isa.name.lower() for isa in get_supported_isas()}:
try:
package = import_module(".cores." + package,
package=__name__.rpartition('.')[0])
package = import_module(
".cores." + isa, package=__name__.rpartition(".")[0]
)
except ImportError:
# No timing models for this ISA
continue
for mod_name, module in \
inspect.getmembers(package, inspect.ismodule):
for name, cls in inspect.getmembers(module,
self._is_obj_class):
for mod_name, module in inspect.getmembers(
package, inspect.ismodule
):
for name, cls in inspect.getmembers(
module, self._is_obj_class
):
self._sub_classes[name] = cls
class EnumList(ObjectList):
""" Creates a list of possible values for a given enum class. """
"""Creates a list of possible values for a given enum class."""
def _add_objects(self):
""" Add all enum values to the ObjectList """
"""Add all enum values to the ObjectList"""
self._sub_classes = {}
for (key, value) in list(self.base_cls.__members__.items()):
# All Enums have a value Num_NAME at the end which we
@@ -160,31 +174,37 @@ class EnumList(ObjectList):
if not key.startswith("Num_"):
self._sub_classes[key] = value
rp_list = ObjectList(getattr(m5.objects, 'BaseReplacementPolicy', None))
bp_list = ObjectList(getattr(m5.objects, 'BranchPredictor', None))
cpu_list = CPUList(getattr(m5.objects, 'BaseCPU', None))
hwp_list = ObjectList(getattr(m5.objects, 'BasePrefetcher', None))
indirect_bp_list = ObjectList(getattr(m5.objects, 'IndirectPredictor', None))
mem_list = ObjectList(getattr(m5.objects, 'AbstractMemory', None))
dram_addr_map_list = EnumList(getattr(m5.internal.params, 'enum_AddrMap',
None))
rp_list = ObjectList(getattr(m5.objects, "BaseReplacementPolicy", None))
bp_list = ObjectList(getattr(m5.objects, "BranchPredictor", None))
cpu_list = CPUList(getattr(m5.objects, "BaseCPU", None))
hwp_list = ObjectList(getattr(m5.objects, "BasePrefetcher", None))
indirect_bp_list = ObjectList(getattr(m5.objects, "IndirectPredictor", None))
mem_list = ObjectList(getattr(m5.objects, "AbstractMemory", None))
dram_addr_map_list = EnumList(
getattr(m5.internal.params, "enum_AddrMap", None)
)
# Platform aliases. The platforms listed here might not be compiled,
# we make sure they exist before we add them to the platform list.
_platform_aliases_all = [
("VExpress_GEM5", "VExpress_GEM5_V1"),
]
platform_list = ObjectList(getattr(m5.objects, 'Platform', None), \
_platform_aliases_all)
_platform_aliases_all = [("VExpress_GEM5", "VExpress_GEM5_V1")]
platform_list = ObjectList(
getattr(m5.objects, "Platform", None), _platform_aliases_all
)
def _subclass_tester(name):
sub_class = getattr(m5.objects, name, None)
def tester(cls):
return sub_class is not None and cls is not None and \
issubclass(cls, sub_class)
return (
sub_class is not None
and cls is not None
and issubclass(cls, sub_class)
)
return tester
is_kvm_cpu = _subclass_tester("BaseKvmCPU")
is_noncaching_cpu = _subclass_tester("NonCachingSimpleCPU")

File diff suppressed because it is too large Load Diff

View File

@@ -44,21 +44,22 @@ from argparse import ArgumentParser
# add the args we want to be able to control from the command line
parser = ArgumentParser()
def add_option(*args, **kwargs):
"""Call "add_option" to the global options parser
"""
"""Call "add_option" to the global options parser"""
if called_parse_args:
m5.fatal("Can't add an option after calling SimpleOpts.parse_args")
parser.add_argument(*args, **kwargs)
def parse_args():
global called_parse_args
called_parse_args = True
return parser.parse_args()
def print_help(*args, **kwargs):
parser.print_help(*args, **kwargs)

View File

@@ -49,27 +49,28 @@ from m5.defines import buildEnv
from m5.objects import *
from m5.util import *
addToPath('../common')
addToPath("../common")
def getCPUClass(cpu_type):
"""Returns the required cpu class and the mode of operation."""
cls = ObjectList.cpu_list.get(cpu_type)
return cls, cls.memory_mode()
def setCPUClass(options):
"""Returns two cpu classes and the initial mode of operation.
Restoring from a checkpoint or fast forwarding through a benchmark
can be done using one type of cpu, and then the actual
simulation can be carried out using another type. This function
returns these two types of cpus and the initial mode of operation
depending on the options provided.
Restoring from a checkpoint or fast forwarding through a benchmark
can be done using one type of cpu, and then the actual
simulation can be carried out using another type. This function
returns these two types of cpus and the initial mode of operation
depending on the options provided.
"""
TmpClass, test_mem_mode = getCPUClass(options.cpu_type)
CPUClass = None
if TmpClass.require_caches() and \
not options.caches and not options.ruby:
if TmpClass.require_caches() and not options.caches and not options.ruby:
fatal("%s must be used with caches" % options.cpu_type)
if options.checkpoint_restore != None:
@@ -79,20 +80,22 @@ def setCPUClass(options):
elif options.fast_forward:
CPUClass = TmpClass
TmpClass = AtomicSimpleCPU
test_mem_mode = 'atomic'
test_mem_mode = "atomic"
# Ruby only supports atomic accesses in noncaching mode
if test_mem_mode == 'atomic' and options.ruby:
if test_mem_mode == "atomic" and options.ruby:
warn("Memory mode will be changed to atomic_noncaching")
test_mem_mode = 'atomic_noncaching'
test_mem_mode = "atomic_noncaching"
return (TmpClass, test_mem_mode, CPUClass)
def setMemClass(options):
"""Returns a memory controller class."""
return ObjectList.mem_list.get(options.mem_type)
def setWorkCountOptions(system, options):
if options.work_item_id != None:
system.work_item_id = options.work_item_id
@@ -111,6 +114,7 @@ def setWorkCountOptions(system, options):
if options.work_cpus_checkpoint_count != None:
system.work_cpus_ckpt_count = options.work_cpus_checkpoint_count
def findCptDir(options, cptdir, testsys):
"""Figures out the directory from which the checkpointed state is read.
@@ -137,7 +141,7 @@ def findCptDir(options, cptdir, testsys):
if options.simpoint:
# assume workload 0 has the simpoint
if testsys.cpu[0].workload[0].simpoint == 0:
fatal('Unable to find simpoint')
fatal("Unable to find simpoint")
inst += int(testsys.cpu[0].workload[0].simpoint)
checkpoint_dir = joinpath(cptdir, "cpt.%s.%s" % (options.bench, inst))
@@ -148,8 +152,10 @@ def findCptDir(options, cptdir, testsys):
# Restore from SimPoint checkpoints
# Assumes that the checkpoint dir names are formatted as follows:
dirs = listdir(cptdir)
expr = re.compile('cpt\.simpoint_(\d+)_inst_(\d+)' +
'_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)')
expr = re.compile(
"cpt\.simpoint_(\d+)_inst_(\d+)"
+ "_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)"
)
cpts = []
for dir in dirs:
match = expr.match(dir)
@@ -159,7 +165,7 @@ def findCptDir(options, cptdir, testsys):
cpt_num = options.checkpoint_restore
if cpt_num > len(cpts):
fatal('Checkpoint %d not found', cpt_num)
fatal("Checkpoint %d not found", cpt_num)
checkpoint_dir = joinpath(cptdir, cpts[cpt_num - 1])
match = expr.match(cpts[cpt_num - 1])
if match:
@@ -176,30 +182,33 @@ def findCptDir(options, cptdir, testsys):
if testsys.switch_cpus != None:
testsys.switch_cpus[0].simpoint_start_insts = simpoint_start_insts
print("Resuming from SimPoint", end=' ')
print("#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d" %
(index, start_inst, weight_inst, interval_length, warmup_length))
print("Resuming from SimPoint", end=" ")
print(
"#%d, start_inst:%d, weight:%f, interval:%d, warmup:%d"
% (index, start_inst, weight_inst, interval_length, warmup_length)
)
else:
dirs = listdir(cptdir)
expr = re.compile('cpt\.([0-9]+)')
expr = re.compile("cpt\.([0-9]+)")
cpts = []
for dir in dirs:
match = expr.match(dir)
if match:
cpts.append(match.group(1))
cpts.sort(key = lambda a: int(a))
cpts.sort(key=lambda a: int(a))
cpt_num = options.checkpoint_restore
if cpt_num > len(cpts):
fatal('Checkpoint %d not found', cpt_num)
fatal("Checkpoint %d not found", cpt_num)
cpt_starttick = int(cpts[cpt_num - 1])
checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1])
return cpt_starttick, checkpoint_dir
def scriptCheckpoints(options, maxtick, cptdir):
if options.at_instruction or options.simpoint:
checkpoint_inst = int(options.take_checkpoints)
@@ -219,8 +228,11 @@ def scriptCheckpoints(options, maxtick, cptdir):
exit_cause = exit_event.getCause()
if exit_cause == "a thread reached the max instruction count":
m5.checkpoint(joinpath(cptdir, "cpt.%s.%d" % \
(options.bench, checkpoint_inst)))
m5.checkpoint(
joinpath(
cptdir, "cpt.%s.%d" % (options.bench, checkpoint_inst)
)
)
print("Checkpoint written.")
else:
@@ -242,8 +254,10 @@ def scriptCheckpoints(options, maxtick, cptdir):
sim_ticks = when
max_checkpoints = options.max_checkpoints
while num_checkpoints < max_checkpoints and \
exit_cause == "simulate() limit reached":
while (
num_checkpoints < max_checkpoints
and exit_cause == "simulate() limit reached"
):
if (sim_ticks + period) > maxtick:
exit_event = m5.simulate(maxtick - sim_ticks)
exit_cause = exit_event.getCause()
@@ -260,6 +274,7 @@ def scriptCheckpoints(options, maxtick, cptdir):
return exit_event
def benchCheckpoints(options, maxtick, cptdir):
exit_event = m5.simulate(maxtick - m5.curTick())
exit_cause = exit_event.getCause()
@@ -279,13 +294,18 @@ def benchCheckpoints(options, maxtick, cptdir):
return exit_event
# Set up environment for taking SimPoint checkpoints
# Expecting SimPoint files generated by SimPoint 3.2
def parseSimpointAnalysisFile(options, testsys):
import re
simpoint_filename, weight_filename, interval_length, warmup_length = \
options.take_simpoint_checkpoints.split(",", 3)
(
simpoint_filename,
weight_filename,
interval_length,
warmup_length,
) = options.take_simpoint_checkpoints.split(",", 3)
print("simpoint analysis file:", simpoint_filename)
print("simpoint weight file:", weight_filename)
print("interval length:", interval_length)
@@ -309,20 +329,19 @@ def parseSimpointAnalysisFile(options, testsys):
if m:
interval = int(m.group(1))
else:
fatal('unrecognized line in simpoint file!')
fatal("unrecognized line in simpoint file!")
line = weight_file.readline()
if not line:
fatal('not enough lines in simpoint weight file!')
fatal("not enough lines in simpoint weight file!")
m = re.match("([0-9\.e\-]+)\s+(\d+)", line)
if m:
weight = float(m.group(1))
else:
fatal('unrecognized line in simpoint weight file!')
fatal("unrecognized line in simpoint weight file!")
if (interval * interval_length - warmup_length > 0):
starting_inst_count = \
interval * interval_length - warmup_length
if interval * interval_length - warmup_length > 0:
starting_inst_count = interval * interval_length - warmup_length
actual_warmup_length = warmup_length
else:
# Not enough room for proper warmup
@@ -330,15 +349,20 @@ def parseSimpointAnalysisFile(options, testsys):
starting_inst_count = 0
actual_warmup_length = interval * interval_length
simpoints.append((interval, weight, starting_inst_count,
actual_warmup_length))
simpoints.append(
(interval, weight, starting_inst_count, actual_warmup_length)
)
# Sort SimPoints by starting inst count
simpoints.sort(key=lambda obj: obj[2])
for s in simpoints:
interval, weight, starting_inst_count, actual_warmup_length = s
print(str(interval), str(weight), starting_inst_count,
actual_warmup_length)
print(
str(interval),
str(weight),
starting_inst_count,
actual_warmup_length,
)
simpoint_start_insts.append(starting_inst_count)
print("Total # of simpoints:", len(simpoints))
@@ -346,6 +370,7 @@ def parseSimpointAnalysisFile(options, testsys):
return (simpoints, interval_length)
def takeSimpointCheckpoints(simpoints, interval_length, cptdir):
num_checkpoints = 0
index = 0
@@ -369,22 +394,34 @@ def takeSimpointCheckpoints(simpoints, interval_length, cptdir):
code = exit_event.getCode()
if exit_cause == "simpoint starting point found":
m5.checkpoint(joinpath(cptdir,
"cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
% (index, starting_inst_count, weight, interval_length,
actual_warmup_length)))
print("Checkpoint #%d written. start inst:%d weight:%f" %
(num_checkpoints, starting_inst_count, weight))
m5.checkpoint(
joinpath(
cptdir,
"cpt.simpoint_%02d_inst_%d_weight_%f_interval_%d_warmup_%d"
% (
index,
starting_inst_count,
weight,
interval_length,
actual_warmup_length,
),
)
)
print(
"Checkpoint #%d written. start inst:%d weight:%f"
% (num_checkpoints, starting_inst_count, weight)
)
num_checkpoints += 1
last_chkpnt_inst_count = starting_inst_count
else:
break
index += 1
print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
print("%d checkpoints taken" % num_checkpoints)
sys.exit(code)
def restoreSimpointCheckpoint():
exit_event = m5.simulate()
exit_cause = exit_event.getCause()
@@ -401,9 +438,10 @@ def restoreSimpointCheckpoint():
print("Done running SimPoint!")
sys.exit(exit_event.getCode())
print('Exiting @ tick %i because %s' % (m5.curTick(), exit_cause))
print("Exiting @ tick %i because %s" % (m5.curTick(), exit_cause))
sys.exit(exit_event.getCode())
def repeatSwitch(testsys, repeat_switch_cpu_list, maxtick, switch_freq):
print("starting switch loop")
while True:
@@ -424,6 +462,7 @@ def repeatSwitch(testsys, repeat_switch_cpu_list, maxtick, switch_freq):
exit_event = m5.simulate(maxtick - m5.curTick())
return exit_event
def run(options, root, testsys, cpu_class):
if options.checkpoint_dir:
cptdir = options.checkpoint_dir
@@ -461,9 +500,17 @@ def run(options, root, testsys, cpu_class):
for i in range(np):
testsys.cpu[i].max_insts_any_thread = options.maxinsts
if options.override_vendor_string is not None:
for i in range(len(testsys.cpu)):
for j in range(len(testsys.cpu[i].isa)):
testsys.cpu[i].isa[
j
].vendor_string = options.override_vendor_string
if cpu_class:
switch_cpus = [cpu_class(switched_out=True, cpu_id=(i))
for i in range(np)]
switch_cpus = [
cpu_class(switched_out=True, cpu_id=(i)) for i in range(np)
]
for i in range(np):
if options.fast_forward:
@@ -471,8 +518,7 @@ def run(options, root, testsys, cpu_class):
switch_cpus[i].system = testsys
switch_cpus[i].workload = testsys.cpu[i].workload
switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
switch_cpus[i].progress_interval = \
testsys.cpu[i].progress_interval
switch_cpus[i].progress_interval = testsys.cpu[i].progress_interval
switch_cpus[i].isa = testsys.cpu[i].isa
# simulation period
if options.maxinsts:
@@ -485,9 +531,11 @@ def run(options, root, testsys, cpu_class):
switch_cpus[i].branchPred = bpClass()
if options.indirect_bp_type:
IndirectBPClass = ObjectList.indirect_bp_list.get(
options.indirect_bp_type)
switch_cpus[i].branchPred.indirectBranchPred = \
IndirectBPClass()
options.indirect_bp_type
)
switch_cpus[
i
].branchPred.indirectBranchPred = IndirectBPClass()
switch_cpus[i].createThreads()
# If elastic tracing is enabled attach the elastic trace probe
@@ -500,16 +548,16 @@ def run(options, root, testsys, cpu_class):
if options.repeat_switch:
switch_class = getCPUClass(options.cpu_type)[0]
if switch_class.require_caches() and \
not options.caches:
if switch_class.require_caches() and not options.caches:
print("%s: Must be used with caches" % str(switch_class))
sys.exit(1)
if not switch_class.support_take_over():
print("%s: CPU switching not supported" % str(switch_class))
sys.exit(1)
repeat_switch_cpus = [switch_class(switched_out=True, \
cpu_id=(i)) for i in range(np)]
repeat_switch_cpus = [
switch_class(switched_out=True, cpu_id=(i)) for i in range(np)
]
for i in range(np):
repeat_switch_cpus[i].system = testsys
@@ -523,24 +571,30 @@ def run(options, root, testsys, cpu_class):
if options.checker:
repeat_switch_cpus[i].addCheckerCpu()
repeat_switch_cpus[i].createThreads()
testsys.repeat_switch_cpus = repeat_switch_cpus
if cpu_class:
repeat_switch_cpu_list = [(switch_cpus[i], repeat_switch_cpus[i])
for i in range(np)]
repeat_switch_cpu_list = [
(switch_cpus[i], repeat_switch_cpus[i]) for i in range(np)
]
else:
repeat_switch_cpu_list = [(testsys.cpu[i], repeat_switch_cpus[i])
for i in range(np)]
repeat_switch_cpu_list = [
(testsys.cpu[i], repeat_switch_cpus[i]) for i in range(np)
]
if options.standard_switch:
switch_cpus = [TimingSimpleCPU(switched_out=True, cpu_id=(i))
for i in range(np)]
switch_cpus_1 = [DerivO3CPU(switched_out=True, cpu_id=(i))
for i in range(np)]
switch_cpus = [
TimingSimpleCPU(switched_out=True, cpu_id=(i)) for i in range(np)
]
switch_cpus_1 = [
DerivO3CPU(switched_out=True, cpu_id=(i)) for i in range(np)
]
for i in range(np):
switch_cpus[i].system = testsys
switch_cpus_1[i].system = testsys
switch_cpus[i].system = testsys
switch_cpus_1[i].system = testsys
switch_cpus[i].workload = testsys.cpu[i].workload
switch_cpus_1[i].workload = testsys.cpu[i].workload
switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain
@@ -557,16 +611,17 @@ def run(options, root, testsys, cpu_class):
# Fast forward to a simpoint (warning: time consuming)
elif options.simpoint:
if testsys.cpu[i].workload[0].simpoint == 0:
fatal('simpoint not found')
testsys.cpu[i].max_insts_any_thread = \
fatal("simpoint not found")
testsys.cpu[i].max_insts_any_thread = (
testsys.cpu[i].workload[0].simpoint
)
# No distance specified, just switch
else:
testsys.cpu[i].max_insts_any_thread = 1
# warmup period
if options.warmup_insts:
switch_cpus[i].max_insts_any_thread = options.warmup_insts
switch_cpus[i].max_insts_any_thread = options.warmup_insts
# simulation period
if options.maxinsts:
@@ -577,25 +632,29 @@ def run(options, root, testsys, cpu_class):
switch_cpus[i].addCheckerCpu()
switch_cpus_1[i].addCheckerCpu()
switch_cpus[i].createThreads()
switch_cpus_1[i].createThreads()
testsys.switch_cpus = switch_cpus
testsys.switch_cpus_1 = switch_cpus_1
switch_cpu_list = [
(testsys.cpu[i], switch_cpus[i]) for i in range(np)
]
switch_cpu_list = [(testsys.cpu[i], switch_cpus[i]) for i in range(np)]
switch_cpu_list1 = [
(switch_cpus[i], switch_cpus_1[i]) for i in range(np)
]
# set the checkpoint in the cpu before m5.instantiate is called
if options.take_checkpoints != None and \
(options.simpoint or options.at_instruction):
if options.take_checkpoints != None and (
options.simpoint or options.at_instruction
):
offset = int(options.take_checkpoints)
# Set an instruction break point
if options.simpoint:
for i in range(np):
if testsys.cpu[i].workload[0].simpoint == 0:
fatal('no simpoint for testsys.cpu[%d].workload[0]', i)
checkpoint_inst = int(testsys.cpu[i].workload[0].simpoint) + offset
fatal("no simpoint for testsys.cpu[%d].workload[0]", i)
checkpoint_inst = (
int(testsys.cpu[i].workload[0].simpoint) + offset
)
testsys.cpu[i].max_insts_any_thread = checkpoint_inst
# used for output below
options.take_checkpoints = checkpoint_inst
@@ -607,7 +666,9 @@ def run(options, root, testsys, cpu_class):
testsys.cpu[i].max_insts_any_thread = offset
if options.take_simpoint_checkpoints != None:
simpoints, interval_length = parseSimpointAnalysisFile(options, testsys)
simpoints, interval_length = parseSimpointAnalysisFile(
options, testsys
)
checkpoint_dir = None
if options.checkpoint_restore:
@@ -640,31 +701,43 @@ def run(options, root, testsys, cpu_class):
# the ticks per simulated second
maxtick_from_rel += cpt_starttick
if options.at_instruction or options.simpoint:
warn("Relative max tick specified with --at-instruction or" \
" --simpoint\n These options don't specify the " \
"checkpoint start tick, so assuming\n you mean " \
"absolute max tick")
warn(
"Relative max tick specified with --at-instruction or"
" --simpoint\n These options don't specify the "
"checkpoint start tick, so assuming\n you mean "
"absolute max tick"
)
explicit_maxticks += 1
if options.maxtime:
maxtick_from_maxtime = m5.ticks.fromSeconds(options.maxtime)
explicit_maxticks += 1
if explicit_maxticks > 1:
warn("Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."\
" Using least")
warn(
"Specified multiple of --abs-max-tick, --rel-max-tick, --maxtime."
" Using least"
)
maxtick = min([maxtick_from_abs, maxtick_from_rel, maxtick_from_maxtime])
if options.checkpoint_restore != None and maxtick < cpt_starttick:
fatal("Bad maxtick (%d) specified: " \
"Checkpoint starts starts from tick: %d", maxtick, cpt_starttick)
fatal(
"Bad maxtick (%d) specified: "
"Checkpoint starts starts from tick: %d",
maxtick,
cpt_starttick,
)
if options.standard_switch or cpu_class:
if options.standard_switch:
print("Switch at instruction count:%s" %
str(testsys.cpu[0].max_insts_any_thread))
print(
"Switch at instruction count:%s"
% str(testsys.cpu[0].max_insts_any_thread)
)
exit_event = m5.simulate()
elif cpu_class and options.fast_forward:
print("Switch at instruction count:%s" %
str(testsys.cpu[0].max_insts_any_thread))
print(
"Switch at instruction count:%s"
% str(testsys.cpu[0].max_insts_any_thread)
)
exit_event = m5.simulate()
else:
print("Switch at curTick count:%s" % str(10000))
@@ -674,32 +747,37 @@ def run(options, root, testsys, cpu_class):
m5.switchCpus(testsys, switch_cpu_list)
if options.standard_switch:
print("Switch at instruction count:%d" %
(testsys.switch_cpus[0].max_insts_any_thread))
print(
"Switch at instruction count:%d"
% (testsys.switch_cpus[0].max_insts_any_thread)
)
#warmup instruction count may have already been set
# warmup instruction count may have already been set
if options.warmup_insts:
exit_event = m5.simulate()
else:
exit_event = m5.simulate(options.standard_switch)
print("Switching CPUS @ tick %s" % (m5.curTick()))
print("Simulation ends instruction count:%d" %
(testsys.switch_cpus_1[0].max_insts_any_thread))
print(
"Simulation ends instruction count:%d"
% (testsys.switch_cpus_1[0].max_insts_any_thread)
)
m5.switchCpus(testsys, switch_cpu_list1)
# If we're taking and restoring checkpoints, use checkpoint_dir
# option only for finding the checkpoints to restore from. This
# lets us test checkpointing by restoring from one set of
# checkpoints, generating a second set, and then comparing them.
if (options.take_checkpoints or options.take_simpoint_checkpoints) \
and options.checkpoint_restore:
if (
options.take_checkpoints or options.take_simpoint_checkpoints
) and options.checkpoint_restore:
if m5.options.outdir:
cptdir = m5.options.outdir
else:
cptdir = getcwd()
if options.take_checkpoints != None :
if options.take_checkpoints != None:
# Checkpoints being taken via the command line at <when> and at
# subsequent periods of <period>. Checkpoint instructions
# received from the benchmark running are ignored and skipped in
@@ -722,13 +800,15 @@ def run(options, root, testsys, cpu_class):
# If checkpoints are being taken, then the checkpoint instruction
# will occur in the benchmark code it self.
if options.repeat_switch and maxtick > options.repeat_switch:
exit_event = repeatSwitch(testsys, repeat_switch_cpu_list,
maxtick, options.repeat_switch)
exit_event = repeatSwitch(
testsys, repeat_switch_cpu_list, maxtick, options.repeat_switch
)
else:
exit_event = benchCheckpoints(options, maxtick, cptdir)
print('Exiting @ tick %i because %s' %
(m5.curTick(), exit_event.getCause()))
print(
"Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause())
)
if options.checkpoint_at_end:
m5.checkpoint(joinpath(cptdir, "cpt.%d"))

View File

@@ -29,9 +29,10 @@ import os, sys
config_path = os.path.dirname(os.path.abspath(__file__))
config_root = os.path.dirname(config_path)
class PathSearchFunc(object):
_sys_paths = None
environment_variable = 'M5_PATH'
environment_variable = "M5_PATH"
def __init__(self, subdirs, sys_paths=None):
if isinstance(subdirs, str):
@@ -46,9 +47,9 @@ class PathSearchFunc(object):
else:
if self._sys_paths is None:
try:
paths = os.environ[self.environment_variable].split(':')
paths = os.environ[self.environment_variable].split(":")
except KeyError:
paths = [ '/dist/m5/system', '/n/poolfs/z/dist/m5/system' ]
paths = ["/dist/m5/system", "/n/poolfs/z/dist/m5/system"]
# expand '~' and '~user' in paths
paths = list(map(os.path.expanduser, paths))
@@ -59,8 +60,10 @@ class PathSearchFunc(object):
if not paths:
raise IOError(
"Can't find system files directory, "
"check your {} environment variable"
.format(self.environment_variable))
"check your {} environment variable".format(
self.environment_variable
)
)
self._sys_paths = list(paths)
@@ -69,9 +72,13 @@ class PathSearchFunc(object):
try:
return next(p for p in paths if os.path.exists(p))
except StopIteration:
raise IOError("Can't find file '{}' on {}."
.format(filepath, self.environment_variable))
raise IOError(
"Can't find file '{}' on {}.".format(
filepath, self.environment_variable
)
)
disk = PathSearchFunc('disks')
binary = PathSearchFunc('binaries')
script = PathSearchFunc('boot', sys_paths=[config_root])
disk = PathSearchFunc("disks")
binary = PathSearchFunc("binaries")
script = PathSearchFunc("boot", sys_paths=[config_root])

View File

@@ -32,4 +32,3 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

View File

@@ -28,65 +28,82 @@ from m5.objects import *
# Simple ALU Instructions have a latency of 1
class O3_ARM_v7a_Simple_Int(FUDesc):
opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
opList = [OpDesc(opClass="IntAlu", opLat=1)]
count = 2
# Complex ALU instructions have a variable latencies
class O3_ARM_v7a_Complex_Int(FUDesc):
opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True),
OpDesc(opClass='IntDiv', opLat=12, pipelined=False),
OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
opList = [
OpDesc(opClass="IntMult", opLat=3, pipelined=True),
OpDesc(opClass="IntDiv", opLat=12, pipelined=False),
OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
]
count = 1
# Floating point and SIMD instructions
class O3_ARM_v7a_FP(FUDesc):
opList = [ OpDesc(opClass='SimdAdd', opLat=4),
OpDesc(opClass='SimdAddAcc', opLat=4),
OpDesc(opClass='SimdAlu', opLat=4),
OpDesc(opClass='SimdCmp', opLat=4),
OpDesc(opClass='SimdCvt', opLat=3),
OpDesc(opClass='SimdMisc', opLat=3),
OpDesc(opClass='SimdMult',opLat=5),
OpDesc(opClass='SimdMultAcc',opLat=5),
OpDesc(opClass='SimdShift',opLat=3),
OpDesc(opClass='SimdShiftAcc', opLat=3),
OpDesc(opClass='SimdSqrt', opLat=9),
OpDesc(opClass='SimdFloatAdd',opLat=5),
OpDesc(opClass='SimdFloatAlu',opLat=5),
OpDesc(opClass='SimdFloatCmp', opLat=3),
OpDesc(opClass='SimdFloatCvt', opLat=3),
OpDesc(opClass='SimdFloatDiv', opLat=3),
OpDesc(opClass='SimdFloatMisc', opLat=3),
OpDesc(opClass='SimdFloatMult', opLat=3),
OpDesc(opClass='SimdFloatMultAcc',opLat=5),
OpDesc(opClass='SimdFloatSqrt', opLat=9),
OpDesc(opClass='FloatAdd', opLat=5),
OpDesc(opClass='FloatCmp', opLat=5),
OpDesc(opClass='FloatCvt', opLat=5),
OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
OpDesc(opClass='FloatMult', opLat=4),
OpDesc(opClass='FloatMultAcc', opLat=5),
OpDesc(opClass='FloatMisc', opLat=3) ]
opList = [
OpDesc(opClass="SimdAdd", opLat=4),
OpDesc(opClass="SimdAddAcc", opLat=4),
OpDesc(opClass="SimdAlu", opLat=4),
OpDesc(opClass="SimdCmp", opLat=4),
OpDesc(opClass="SimdCvt", opLat=3),
OpDesc(opClass="SimdMisc", opLat=3),
OpDesc(opClass="SimdMult", opLat=5),
OpDesc(opClass="SimdMultAcc", opLat=5),
OpDesc(opClass="SimdShift", opLat=3),
OpDesc(opClass="SimdShiftAcc", opLat=3),
OpDesc(opClass="SimdSqrt", opLat=9),
OpDesc(opClass="SimdFloatAdd", opLat=5),
OpDesc(opClass="SimdFloatAlu", opLat=5),
OpDesc(opClass="SimdFloatCmp", opLat=3),
OpDesc(opClass="SimdFloatCvt", opLat=3),
OpDesc(opClass="SimdFloatDiv", opLat=3),
OpDesc(opClass="SimdFloatMisc", opLat=3),
OpDesc(opClass="SimdFloatMult", opLat=3),
OpDesc(opClass="SimdFloatMultAcc", opLat=5),
OpDesc(opClass="SimdFloatSqrt", opLat=9),
OpDesc(opClass="FloatAdd", opLat=5),
OpDesc(opClass="FloatCmp", opLat=5),
OpDesc(opClass="FloatCvt", opLat=5),
OpDesc(opClass="FloatDiv", opLat=9, pipelined=False),
OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
OpDesc(opClass="FloatMult", opLat=4),
OpDesc(opClass="FloatMultAcc", opLat=5),
OpDesc(opClass="FloatMisc", opLat=3),
]
count = 2
# Load/Store Units
class O3_ARM_v7a_Load(FUDesc):
opList = [ OpDesc(opClass='MemRead',opLat=2),
OpDesc(opClass='FloatMemRead',opLat=2) ]
opList = [
OpDesc(opClass="MemRead", opLat=2),
OpDesc(opClass="FloatMemRead", opLat=2),
]
count = 1
class O3_ARM_v7a_Store(FUDesc):
opList = [ OpDesc(opClass='MemWrite',opLat=2),
OpDesc(opClass='FloatMemWrite',opLat=2) ]
opList = [
OpDesc(opClass="MemWrite", opLat=2),
OpDesc(opClass="FloatMemWrite", opLat=2),
]
count = 1
# Functional Units for this CPU
class O3_ARM_v7a_FUP(FUPool):
FUList = [O3_ARM_v7a_Simple_Int(), O3_ARM_v7a_Complex_Int(),
O3_ARM_v7a_Load(), O3_ARM_v7a_Store(), O3_ARM_v7a_FP()]
FUList = [
O3_ARM_v7a_Simple_Int(),
O3_ARM_v7a_Complex_Int(),
O3_ARM_v7a_Load(),
O3_ARM_v7a_Store(),
O3_ARM_v7a_FP(),
]
# Bi-Mode Branch Predictor
class O3_ARM_v7a_BP(BiModeBP):
@@ -99,6 +116,7 @@ class O3_ARM_v7a_BP(BiModeBP):
RASSize = 16
instShiftAmt = 2
class O3_ARM_v7a_3(ArmO3CPU):
LQEntries = 16
SQEntries = 16
@@ -143,6 +161,7 @@ class O3_ARM_v7a_3(ArmO3CPU):
switched_out = False
branchPred = O3_ARM_v7a_BP()
# Instruction Cache
class O3_ARM_v7a_ICache(Cache):
tag_latency = 1
@@ -150,12 +169,13 @@ class O3_ARM_v7a_ICache(Cache):
response_latency = 1
mshrs = 2
tgts_per_mshr = 8
size = '32kB'
size = "32kB"
assoc = 2
is_read_only = True
# Writeback clean lines as well
writeback_clean = True
# Data Cache
class O3_ARM_v7a_DCache(Cache):
tag_latency = 2
@@ -163,12 +183,13 @@ class O3_ARM_v7a_DCache(Cache):
response_latency = 2
mshrs = 6
tgts_per_mshr = 8
size = '32kB'
size = "32kB"
assoc = 2
write_buffers = 16
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
# L2 Cache
class O3_ARM_v7aL2(Cache):
tag_latency = 12
@@ -176,12 +197,12 @@ class O3_ARM_v7aL2(Cache):
response_latency = 12
mshrs = 16
tgts_per_mshr = 8
size = '1MB'
size = "1MB"
assoc = 16
write_buffers = 8
prefetch_on_access = True
clusivity = 'mostly_excl'
clusivity = "mostly_excl"
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=8, latency = 1)
prefetcher = StridePrefetcher(degree=8, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()

View File

@@ -36,9 +36,7 @@
from pkgutil import iter_modules
from importlib import import_module
_cpu_modules = [
name for _, name, ispkg in iter_modules(__path__) if not ispkg
]
_cpu_modules = [name for _, name, ispkg in iter_modules(__path__) if not ispkg]
for c in _cpu_modules:
try:

View File

@@ -27,70 +27,89 @@
from m5.objects import *
#-----------------------------------------------------------------------
# -----------------------------------------------------------------------
# ex5 LITTLE core (based on the ARM Cortex-A7)
#-----------------------------------------------------------------------
# -----------------------------------------------------------------------
# Simple ALU Instructions have a latency of 3
class ex5_LITTLE_Simple_Int(MinorDefaultIntFU):
opList = [ OpDesc(opClass='IntAlu', opLat=4) ]
opList = [OpDesc(opClass="IntAlu", opLat=4)]
# Complex ALU instructions have a variable latencies
class ex5_LITTLE_Complex_IntMul(MinorDefaultIntMulFU):
opList = [ OpDesc(opClass='IntMult', opLat=7) ]
opList = [OpDesc(opClass="IntMult", opLat=7)]
class ex5_LITTLE_Complex_IntDiv(MinorDefaultIntDivFU):
opList = [ OpDesc(opClass='IntDiv', opLat=9) ]
opList = [OpDesc(opClass="IntDiv", opLat=9)]
# Floating point and SIMD instructions
class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
opList = [ OpDesc(opClass='SimdAdd', opLat=6),
OpDesc(opClass='SimdAddAcc', opLat=4),
OpDesc(opClass='SimdAlu', opLat=4),
OpDesc(opClass='SimdCmp', opLat=1),
OpDesc(opClass='SimdCvt', opLat=3),
OpDesc(opClass='SimdMisc', opLat=3),
OpDesc(opClass='SimdMult',opLat=4),
OpDesc(opClass='SimdMultAcc',opLat=5),
OpDesc(opClass='SimdShift',opLat=3),
OpDesc(opClass='SimdShiftAcc', opLat=3),
OpDesc(opClass='SimdSqrt', opLat=9),
OpDesc(opClass='SimdFloatAdd',opLat=8),
OpDesc(opClass='SimdFloatAlu',opLat=6),
OpDesc(opClass='SimdFloatCmp', opLat=6),
OpDesc(opClass='SimdFloatCvt', opLat=6),
OpDesc(opClass='SimdFloatDiv', opLat=20, pipelined=False),
OpDesc(opClass='SimdFloatMisc', opLat=6),
OpDesc(opClass='SimdFloatMult', opLat=15),
OpDesc(opClass='SimdFloatMultAcc',opLat=6),
OpDesc(opClass='SimdFloatSqrt', opLat=17),
OpDesc(opClass='FloatAdd', opLat=8),
OpDesc(opClass='FloatCmp', opLat=6),
OpDesc(opClass='FloatCvt', opLat=6),
OpDesc(opClass='FloatDiv', opLat=15, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=33),
OpDesc(opClass='FloatMult', opLat=6) ]
opList = [
OpDesc(opClass="SimdAdd", opLat=6),
OpDesc(opClass="SimdAddAcc", opLat=4),
OpDesc(opClass="SimdAlu", opLat=4),
OpDesc(opClass="SimdCmp", opLat=1),
OpDesc(opClass="SimdCvt", opLat=3),
OpDesc(opClass="SimdMisc", opLat=3),
OpDesc(opClass="SimdMult", opLat=4),
OpDesc(opClass="SimdMultAcc", opLat=5),
OpDesc(opClass="SimdShift", opLat=3),
OpDesc(opClass="SimdShiftAcc", opLat=3),
OpDesc(opClass="SimdSqrt", opLat=9),
OpDesc(opClass="SimdFloatAdd", opLat=8),
OpDesc(opClass="SimdFloatAlu", opLat=6),
OpDesc(opClass="SimdFloatCmp", opLat=6),
OpDesc(opClass="SimdFloatCvt", opLat=6),
OpDesc(opClass="SimdFloatDiv", opLat=20, pipelined=False),
OpDesc(opClass="SimdFloatMisc", opLat=6),
OpDesc(opClass="SimdFloatMult", opLat=15),
OpDesc(opClass="SimdFloatMultAcc", opLat=6),
OpDesc(opClass="SimdFloatSqrt", opLat=17),
OpDesc(opClass="FloatAdd", opLat=8),
OpDesc(opClass="FloatCmp", opLat=6),
OpDesc(opClass="FloatCvt", opLat=6),
OpDesc(opClass="FloatDiv", opLat=15, pipelined=False),
OpDesc(opClass="FloatSqrt", opLat=33),
OpDesc(opClass="FloatMult", opLat=6),
]
# Load/Store Units
class ex5_LITTLE_MemFU(MinorDefaultMemFU):
opList = [ OpDesc(opClass='MemRead',opLat=1),
OpDesc(opClass='MemWrite',opLat=1) ]
opList = [
OpDesc(opClass="MemRead", opLat=1),
OpDesc(opClass="MemWrite", opLat=1),
]
# Misc Unit
class ex5_LITTLE_MiscFU(MinorDefaultMiscFU):
opList = [ OpDesc(opClass='IprAccess',opLat=1),
OpDesc(opClass='InstPrefetch',opLat=1) ]
opList = [
OpDesc(opClass="IprAccess", opLat=1),
OpDesc(opClass="InstPrefetch", opLat=1),
]
# Functional Units for this CPU
class ex5_LITTLE_FUP(MinorFUPool):
funcUnits = [ex5_LITTLE_Simple_Int(), ex5_LITTLE_Simple_Int(),
ex5_LITTLE_Complex_IntMul(), ex5_LITTLE_Complex_IntDiv(),
ex5_LITTLE_FP(), ex5_LITTLE_MemFU(),
ex5_LITTLE_MiscFU()]
funcUnits = [
ex5_LITTLE_Simple_Int(),
ex5_LITTLE_Simple_Int(),
ex5_LITTLE_Complex_IntMul(),
ex5_LITTLE_Complex_IntDiv(),
ex5_LITTLE_FP(),
ex5_LITTLE_MemFU(),
ex5_LITTLE_MiscFU(),
]
class ex5_LITTLE(ArmMinorCPU):
executeFuncUnits = ex5_LITTLE_FUP()
class L1Cache(Cache):
tag_latency = 2
data_latency = 2
@@ -99,19 +118,22 @@ class L1Cache(Cache):
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
class L1I(L1Cache):
mshrs = 2
size = '32kB'
size = "32kB"
assoc = 2
is_read_only = True
tgts_per_mshr = 20
class L1D(L1Cache):
mshrs = 4
size = '32kB'
size = "32kB"
assoc = 4
write_buffers = 4
# L2 Cache
class L2(Cache):
tag_latency = 9
@@ -119,12 +141,12 @@ class L2(Cache):
response_latency = 9
mshrs = 8
tgts_per_mshr = 12
size = '512kB'
size = "512kB"
assoc = 8
write_buffers = 16
prefetch_on_access = True
clusivity = 'mostly_excl'
clusivity = "mostly_excl"
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=1, latency = 1)
prefetcher = StridePrefetcher(degree=1, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()

View File

@@ -27,66 +27,80 @@
from m5.objects import *
#-----------------------------------------------------------------------
# -----------------------------------------------------------------------
# ex5 big core (based on the ARM Cortex-A15)
#-----------------------------------------------------------------------
# -----------------------------------------------------------------------
# Simple ALU Instructions have a latency of 1
class ex5_big_Simple_Int(FUDesc):
opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
opList = [OpDesc(opClass="IntAlu", opLat=1)]
count = 2
# Complex ALU instructions have a variable latencies
class ex5_big_Complex_Int(FUDesc):
opList = [ OpDesc(opClass='IntMult', opLat=4, pipelined=True),
OpDesc(opClass='IntDiv', opLat=11, pipelined=False),
OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
opList = [
OpDesc(opClass="IntMult", opLat=4, pipelined=True),
OpDesc(opClass="IntDiv", opLat=11, pipelined=False),
OpDesc(opClass="IprAccess", opLat=3, pipelined=True),
]
count = 1
# Floating point and SIMD instructions
class ex5_big_FP(FUDesc):
opList = [ OpDesc(opClass='SimdAdd', opLat=3),
OpDesc(opClass='SimdAddAcc', opLat=4),
OpDesc(opClass='SimdAlu', opLat=4),
OpDesc(opClass='SimdCmp', opLat=4),
OpDesc(opClass='SimdCvt', opLat=3),
OpDesc(opClass='SimdMisc', opLat=3),
OpDesc(opClass='SimdMult',opLat=6),
OpDesc(opClass='SimdMultAcc',opLat=5),
OpDesc(opClass='SimdShift',opLat=3),
OpDesc(opClass='SimdShiftAcc', opLat=3),
OpDesc(opClass='SimdSqrt', opLat=9),
OpDesc(opClass='SimdFloatAdd',opLat=6),
OpDesc(opClass='SimdFloatAlu',opLat=5),
OpDesc(opClass='SimdFloatCmp', opLat=3),
OpDesc(opClass='SimdFloatCvt', opLat=3),
OpDesc(opClass='SimdFloatDiv', opLat=21),
OpDesc(opClass='SimdFloatMisc', opLat=3),
OpDesc(opClass='SimdFloatMult', opLat=6),
OpDesc(opClass='SimdFloatMultAcc',opLat=1),
OpDesc(opClass='SimdFloatSqrt', opLat=9),
OpDesc(opClass='FloatAdd', opLat=6),
OpDesc(opClass='FloatCmp', opLat=5),
OpDesc(opClass='FloatCvt', opLat=5),
OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
OpDesc(opClass='FloatMult', opLat=8) ]
opList = [
OpDesc(opClass="SimdAdd", opLat=3),
OpDesc(opClass="SimdAddAcc", opLat=4),
OpDesc(opClass="SimdAlu", opLat=4),
OpDesc(opClass="SimdCmp", opLat=4),
OpDesc(opClass="SimdCvt", opLat=3),
OpDesc(opClass="SimdMisc", opLat=3),
OpDesc(opClass="SimdMult", opLat=6),
OpDesc(opClass="SimdMultAcc", opLat=5),
OpDesc(opClass="SimdShift", opLat=3),
OpDesc(opClass="SimdShiftAcc", opLat=3),
OpDesc(opClass="SimdSqrt", opLat=9),
OpDesc(opClass="SimdFloatAdd", opLat=6),
OpDesc(opClass="SimdFloatAlu", opLat=5),
OpDesc(opClass="SimdFloatCmp", opLat=3),
OpDesc(opClass="SimdFloatCvt", opLat=3),
OpDesc(opClass="SimdFloatDiv", opLat=21),
OpDesc(opClass="SimdFloatMisc", opLat=3),
OpDesc(opClass="SimdFloatMult", opLat=6),
OpDesc(opClass="SimdFloatMultAcc", opLat=1),
OpDesc(opClass="SimdFloatSqrt", opLat=9),
OpDesc(opClass="FloatAdd", opLat=6),
OpDesc(opClass="FloatCmp", opLat=5),
OpDesc(opClass="FloatCvt", opLat=5),
OpDesc(opClass="FloatDiv", opLat=12, pipelined=False),
OpDesc(opClass="FloatSqrt", opLat=33, pipelined=False),
OpDesc(opClass="FloatMult", opLat=8),
]
count = 2
# Load/Store Units
class ex5_big_Load(FUDesc):
opList = [ OpDesc(opClass='MemRead',opLat=2) ]
opList = [OpDesc(opClass="MemRead", opLat=2)]
count = 1
class ex5_big_Store(FUDesc):
opList = [OpDesc(opClass='MemWrite',opLat=2) ]
opList = [OpDesc(opClass="MemWrite", opLat=2)]
count = 1
# Functional Units for this CPU
class ex5_big_FUP(FUPool):
FUList = [ex5_big_Simple_Int(), ex5_big_Complex_Int(),
ex5_big_Load(), ex5_big_Store(), ex5_big_FP()]
FUList = [
ex5_big_Simple_Int(),
ex5_big_Complex_Int(),
ex5_big_Load(),
ex5_big_Store(),
ex5_big_FP(),
]
# Bi-Mode Branch Predictor
class ex5_big_BP(BiModeBP):
@@ -99,6 +113,7 @@ class ex5_big_BP(BiModeBP):
RASSize = 48
instShiftAmt = 2
class ex5_big(ArmO3CPU):
LQEntries = 16
SQEntries = 16
@@ -142,6 +157,7 @@ class ex5_big(ArmO3CPU):
switched_out = False
branchPred = ex5_big_BP()
class L1Cache(Cache):
tag_latency = 2
data_latency = 2
@@ -150,20 +166,23 @@ class L1Cache(Cache):
# Consider the L2 a victim cache also for clean lines
writeback_clean = True
# Instruction Cache
class L1I(L1Cache):
mshrs = 2
size = '32kB'
size = "32kB"
assoc = 2
is_read_only = True
# Data Cache
class L1D(L1Cache):
mshrs = 6
size = '32kB'
size = "32kB"
assoc = 2
write_buffers = 16
# L2 Cache
class L2(Cache):
tag_latency = 15
@@ -171,12 +190,12 @@ class L2(Cache):
response_latency = 15
mshrs = 16
tgts_per_mshr = 8
size = '2MB'
size = "2MB"
assoc = 16
write_buffers = 8
prefetch_on_access = True
clusivity = 'mostly_excl'
clusivity = "mostly_excl"
# Simple stride prefetcher
prefetcher = StridePrefetcher(degree=8, latency = 1)
prefetcher = StridePrefetcher(degree=8, latency=1)
tags = BaseSetAssoc()
replacement_policy = RandomRP()

File diff suppressed because it is too large Load Diff

33
configs/dist/sw.py vendored
View File

@@ -35,33 +35,39 @@ from m5.defines import buildEnv
from m5.objects import *
from m5.util import addToPath, fatal
addToPath('../')
addToPath("../")
from common import Simulation
from common import Options
def build_switch(args):
# instantiate an EtherSwitch
switch = EtherSwitch()
# instantiate distEtherLinks to connect switch ports
# to other gem5 instances
switch.portlink = [DistEtherLink(speed = args.ethernet_linkspeed,
delay = args.ethernet_linkdelay,
dist_rank = args.dist_rank,
dist_size = args.dist_size,
server_name = args.dist_server_name,
server_port = args.dist_server_port,
sync_start = args.dist_sync_start,
sync_repeat = args.dist_sync_repeat,
is_switch = True,
num_nodes = args.dist_size)
for i in range(args.dist_size)]
switch.portlink = [
DistEtherLink(
speed=args.ethernet_linkspeed,
delay=args.ethernet_linkdelay,
dist_rank=args.dist_rank,
dist_size=args.dist_size,
server_name=args.dist_server_name,
server_port=args.dist_server_port,
sync_start=args.dist_sync_start,
sync_repeat=args.dist_sync_repeat,
is_switch=True,
num_nodes=args.dist_size,
)
for i in range(args.dist_size)
]
for (i, link) in enumerate(switch.portlink):
link.int0 = switch.interface[i]
return switch
def main():
# Add options
parser = argparse.ArgumentParser()
@@ -70,8 +76,9 @@ def main():
args = parser.parse_args()
system = build_switch(args)
root = Root(full_system = True, system = system)
root = Root(full_system=True, system=system)
Simulation.run(args, root, None, None)
if __name__ == "__m5_main__":
main()

View File

@@ -42,11 +42,11 @@ from m5.objects import *
from m5.util import addToPath
from m5.stats import periodicStatDump
addToPath('../')
addToPath("../")
from common import ObjectList
from common import MemConfig
addToPath('../../util')
addToPath("../../util")
import protolib
# this script is helpful to observe the memory latency for various
@@ -61,8 +61,15 @@ try:
except:
print("Did not find packet proto definitions, attempting to generate")
from subprocess import call
error = call(['protoc', '--python_out=configs/dram',
'--proto_path=src/proto', 'src/proto/packet.proto'])
error = call(
[
"protoc",
"--python_out=configs/dram",
"--proto_path=src/proto",
"src/proto/packet.proto",
]
)
if not error:
print("Generated packet proto definitions")
@@ -79,24 +86,34 @@ except:
parser = argparse.ArgumentParser()
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument("--mem-size", action="store", type=str,
default="16MB",
help="Specify the memory size")
parser.add_argument("--reuse-trace", action="store_true",
help="Prevent generation of traces and reuse existing")
parser.add_argument(
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="16MB",
help="Specify the memory size",
)
parser.add_argument(
"--reuse-trace",
action="store_true",
help="Prevent generation of traces and reuse existing",
)
args = parser.parse_args()
# start by creating the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
# amounts to 42.7 GByte/s per layer and thus per port
system = System(membus = SystemXBar(width = 32))
system.clk_domain = SrcClockDomain(clock = '2.0GHz',
voltage_domain =
VoltageDomain(voltage = '1V'))
system = System(membus=SystemXBar(width=32))
system.clk_domain = SrcClockDomain(
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
)
mem_range = AddrRange(args.mem_size)
system.mem_ranges = [mem_range]
@@ -122,12 +139,12 @@ for ctrl in system.mem_ctrls:
if isinstance(ctrl, m5.objects.MemCtrl):
# make the DRAM refresh interval sufficiently infinite to avoid
# latency spikes
ctrl.tREFI = '100s'
ctrl.tREFI = "100s"
# use the same concept as the utilisation sweep, and print the config
# so that we can later read it in
cfg_file_name = os.path.join(m5.options.outdir, "lat_mem_rd.cfg")
cfg_file = open(cfg_file_name, 'w')
cfg_file = open(cfg_file_name, "w")
# set an appropriate burst length in bytes
burst_size = 64
@@ -137,6 +154,7 @@ system.cache_line_size = burst_size
def is_pow2(num):
return num != 0 and ((num & (num - 1)) == 0)
# assume we start every range at 0
max_range = int(mem_range.end)
@@ -164,7 +182,7 @@ itt = 150 * 1000
# the actual measurement
def create_trace(filename, max_addr, burst_size, itt):
try:
proto_out = gzip.open(filename, 'wb')
proto_out = gzip.open(filename, "wb")
except IOError:
print("Failed to open ", filename, " for writing")
exit(-1)
@@ -184,6 +202,7 @@ def create_trace(filename, max_addr, burst_size, itt):
addrs = list(range(0, max_addr, burst_size))
import random
random.shuffle(addrs)
tick = 0
@@ -202,6 +221,7 @@ def create_trace(filename, max_addr, burst_size, itt):
proto_out.close()
# this will take a while, so keep the user informed
print("Generating traces, please wait...")
@@ -211,22 +231,23 @@ period = int(itt * (max_range / burst_size))
# now we create the states for each range
for r in ranges:
filename = os.path.join(m5.options.outdir,
'lat_mem_rd%d.trc.gz' % nxt_range)
filename = os.path.join(
m5.options.outdir, "lat_mem_rd%d.trc.gz" % nxt_range
)
if not args.reuse_trace:
# create the actual random trace for this range
create_trace(filename, r, burst_size, itt)
# the warming state
cfg_file.write("STATE %d %d TRACE %s 0\n" %
(nxt_state, period, filename))
cfg_file.write("STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename))
nxt_state = nxt_state + 1
# the measuring states
for i in range(iterations):
cfg_file.write("STATE %d %d TRACE %s 0\n" %
(nxt_state, period, filename))
cfg_file.write(
"STATE %d %d TRACE %s 0\n" % (nxt_state, period, filename)
)
nxt_state = nxt_state + 1
nxt_range = nxt_range + 1
@@ -242,8 +263,7 @@ cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state - 1, nxt_state - 1))
cfg_file.close()
# create a traffic generator, and point it to the file we just created
system.tgen = TrafficGen(config_file = cfg_file_name,
progress_check = '10s')
system.tgen = TrafficGen(config_file=cfg_file_name, progress_check="10s")
# add a communication monitor
system.monitor = CommMonitor()
@@ -267,19 +287,20 @@ class L3Cache(Cache):
tgts_per_mshr = 12
write_buffers = 16
# note that everything is in the same clock domain, 2.0 GHz as
# specified above
system.l1cache = L1_DCache(size = '64kB')
system.l1cache = L1_DCache(size="64kB")
system.monitor.mem_side_port = system.l1cache.cpu_side
system.l2cache = L2Cache(size = '512kB', writeback_clean = True)
system.l2cache = L2Cache(size="512kB", writeback_clean=True)
system.l2cache.xbar = L2XBar()
system.l1cache.mem_side = system.l2cache.xbar.cpu_side_ports
system.l2cache.cpu_side = system.l2cache.xbar.mem_side_ports
# make the L3 mostly exclusive, and correspondingly ensure that the L2
# writes back also clean lines to the L3
system.l3cache = L3Cache(size = '4MB', clusivity = 'mostly_excl')
system.l3cache = L3Cache(size="4MB", clusivity="mostly_excl")
system.l3cache.xbar = L2XBar()
system.l2cache.mem_side = system.l3cache.xbar.cpu_side_ports
system.l3cache.cpu_side = system.l3cache.xbar.mem_side_ports
@@ -292,8 +313,8 @@ system.system_port = system.membus.cpu_side_ports
periodicStatDump(period)
# run Forrest, run!
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
root = Root(full_system=False, system=system)
root.system.mem_mode = "timing"
m5.instantiate()
m5.simulate(nxt_state * period)

View File

@@ -40,7 +40,7 @@ from m5.objects import *
from m5.util import addToPath
from m5.stats import periodicStatDump
addToPath('../')
addToPath("../")
from common import ObjectList
from common import MemConfig
@@ -52,46 +52,70 @@ from common import MemConfig
# through an idle state with no requests to enforce self-refresh.
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
# Use a single-channel DDR4-2400 in 16x4 configuration by default
parser.add_argument("--mem-type", default="DDR4_2400_16x4",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument(
"--mem-type",
default="DDR4_2400_16x4",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument("--mem-ranks", "-r", type=int, default=1,
help = "Number of ranks to iterate across")
parser.add_argument(
"--mem-ranks",
"-r",
type=int,
default=1,
help="Number of ranks to iterate across",
)
parser.add_argument("--page-policy", "-p",
choices=["close_adaptive", "open_adaptive"],
default="close_adaptive", help="controller page policy")
parser.add_argument(
"--page-policy",
"-p",
choices=["close_adaptive", "open_adaptive"],
default="close_adaptive",
help="controller page policy",
)
parser.add_argument("--itt-list", "-t", default="1 20 100",
help="a list of multipliers for the max value of itt, " \
"e.g. \"1 20 100\"")
parser.add_argument(
"--itt-list",
"-t",
default="1 20 100",
help="a list of multipliers for the max value of itt, " 'e.g. "1 20 100"',
)
parser.add_argument("--rd-perc", type=int, default=100,
help = "Percentage of read commands")
parser.add_argument(
"--rd-perc", type=int, default=100, help="Percentage of read commands"
)
parser.add_argument("--addr-map",
choices=m5.objects.AddrMap.vals,
default="RoRaBaCoCh", help = "DRAM address map policy")
parser.add_argument(
"--addr-map",
choices=m5.objects.AddrMap.vals,
default="RoRaBaCoCh",
help="DRAM address map policy",
)
parser.add_argument("--idle-end", type=int, default=50000000,
help = "time in ps of an idle period at the end ")
parser.add_argument(
"--idle-end",
type=int,
default=50000000,
help="time in ps of an idle period at the end ",
)
args = parser.parse_args()
# Start with the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle)
# which amounts to 42.7 GByte/s per layer and thus per port.
system = System(membus = IOXBar(width = 32))
system.clk_domain = SrcClockDomain(clock = '2.0GHz',
voltage_domain =
VoltageDomain(voltage = '1V'))
system = System(membus=IOXBar(width=32))
system.clk_domain = SrcClockDomain(
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
)
# We are fine with 256 MB memory for now.
mem_range = AddrRange('256MB')
mem_range = AddrRange("256MB")
# Start address is 0
system.mem_ranges = [mem_range]
@@ -130,20 +154,27 @@ period = 250000000
# We specify the states in a config file input to the traffic generator.
cfg_file_name = "lowp_sweep.cfg"
cfg_file_path = os.path.dirname(__file__) + "/" +cfg_file_name
cfg_file = open(cfg_file_path, 'w')
cfg_file_path = os.path.dirname(__file__) + "/" + cfg_file_name
cfg_file = open(cfg_file_path, "w")
# Get the number of banks
nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
# determine the burst size in bytes
burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
system.mem_ctrls[0].dram.device_bus_width.value *
system.mem_ctrls[0].dram.burst_length.value) / 8)
burst_size = int(
(
system.mem_ctrls[0].dram.devices_per_rank.value
* system.mem_ctrls[0].dram.device_bus_width.value
* system.mem_ctrls[0].dram.burst_length.value
)
/ 8
)
# next, get the page size in bytes (the rowbuffer size is already in bytes)
page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
system.mem_ctrls[0].dram.device_rowbuffer_size.value
page_size = (
system.mem_ctrls[0].dram.devices_per_rank.value
* system.mem_ctrls[0].dram.device_rowbuffer_size.value
)
# Inter-request delay should be such that we can hit as many transitions
# to/from low power states as possible to. We provide a min and max itt to the
@@ -151,23 +182,25 @@ page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
# seconds and we need it in ticks (ps).
itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
# The itt value when set to (tRAS + tRP + tCK) covers the case where
# a read command is delayed beyond the delay from ACT to PRE_PDN entry of the
# previous command. For write command followed by precharge, this delay
# between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK.
# As we use this delay as a unit and create multiples of it as bigger delays
# for the sweep, this parameter works for reads, writes and mix of them.
pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
system.mem_ctrls[0].dram.tRP.value +
system.mem_ctrls[0].dram.tCK.value) * 1000000000000
pd_entry_time = (
system.mem_ctrls[0].dram.tRAS.value
+ system.mem_ctrls[0].dram.tRP.value
+ system.mem_ctrls[0].dram.tCK.value
) * 1000000000000
# We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
itt_max_multiples = [ int(x) for x in itt_max_str ]
itt_max_multiples = [int(x) for x in itt_max_str]
if len(itt_max_multiples) == 0:
fatal("String for itt-max-list detected empty\n")
itt_max_values = [ pd_entry_time * m for m in itt_max_multiples ]
itt_max_values = [pd_entry_time * m for m in itt_max_multiples]
# Generate request addresses in the entire range, assume we start at 0
max_addr = mem_range.end
@@ -180,12 +213,14 @@ stride_values = [burst_size, mid_stride, max_stride]
# be selective about bank utilization instead of going from 1 to the number of
# banks
bank_util_values = [1, int(nbr_banks/2), nbr_banks]
bank_util_values = [1, int(nbr_banks / 2), nbr_banks]
# Next we create the config file, but first a comment
cfg_file.write("""# STATE state# period mode=DRAM
cfg_file.write(
"""# STATE state# period mode=DRAM
# read_percent start_addr end_addr req_size min_itt max_itt data_limit
# stride_size page_size #banks #banks_util addr_map #ranks\n""")
# stride_size page_size #banks #banks_util addr_map #ranks\n"""
)
addr_map = m5.objects.AddrMap.map[args.addr_map]
@@ -193,12 +228,27 @@ nxt_state = 0
for itt_max in itt_max_values:
for bank in bank_util_values:
for stride_size in stride_values:
cfg_file.write("STATE %d %d %s %d 0 %d %d "
"%d %d %d %d %d %d %d %d %d\n" %
(nxt_state, period, "DRAM", args.rd_perc, max_addr,
burst_size, itt_min, itt_max, 0, stride_size,
page_size, nbr_banks, bank, addr_map,
args.mem_ranks))
cfg_file.write(
"STATE %d %d %s %d 0 %d %d "
"%d %d %d %d %d %d %d %d %d\n"
% (
nxt_state,
period,
"DRAM",
args.rd_perc,
max_addr,
burst_size,
itt_min,
itt_max,
0,
stride_size,
page_size,
nbr_banks,
bank,
addr_map,
args.mem_ranks,
)
)
nxt_state = nxt_state + 1
# State for idle period
@@ -217,7 +267,7 @@ cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state, nxt_state))
cfg_file.close()
# create a traffic generator, and point it to the file we just created
system.tgen = TrafficGen(config_file = cfg_file_path)
system.tgen = TrafficGen(config_file=cfg_file_path)
# add a communication monitor
system.monitor = CommMonitor()
@@ -232,8 +282,8 @@ system.system_port = system.membus.cpu_side_ports
# every period, dump and reset all stats
periodicStatDump(period)
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
root = Root(full_system=False, system=system)
root.system.mem_mode = "timing"
m5.instantiate()
@@ -242,8 +292,10 @@ m5.instantiate()
m5.simulate(nxt_state * period + idle_period)
print("--- Done DRAM low power sweep ---")
print("Fixed params - ")
print("\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" % \
(burst_size, nbr_banks, max_stride, itt_min))
print(
"\tburst: %d, banks: %d, max stride: %d, itt min: %s ns"
% (burst_size, nbr_banks, max_stride, itt_min)
)
print("Swept params - ")
print("\titt max multiples input:", itt_max_multiples)
print("\titt max values", itt_max_values)

View File

@@ -41,7 +41,7 @@ from m5.objects import *
from m5.util import addToPath
from m5.stats import periodicStatDump
addToPath('../')
addToPath("../")
from common import ObjectList
from common import MemConfig
@@ -54,29 +54,44 @@ from common import MemConfig
parser = argparse.ArgumentParser()
dram_generators = {
"DRAM" : lambda x: x.createDram,
"DRAM_ROTATE" : lambda x: x.createDramRot,
"DRAM": lambda x: x.createDram,
"DRAM_ROTATE": lambda x: x.createDramRot,
}
# Use a single-channel DDR3-1600 x64 (8x8 topology) by default
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument(
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument("--mem-ranks", "-r", type=int, default=1,
help = "Number of ranks to iterate across")
parser.add_argument(
"--mem-ranks",
"-r",
type=int,
default=1,
help="Number of ranks to iterate across",
)
parser.add_argument("--rd_perc", type=int, default=100,
help = "Percentage of read commands")
parser.add_argument(
"--rd_perc", type=int, default=100, help="Percentage of read commands"
)
parser.add_argument("--mode", default="DRAM",
choices=list(dram_generators.keys()),
help = "DRAM: Random traffic; \
DRAM_ROTATE: Traffic rotating across banks and ranks")
parser.add_argument(
"--mode",
default="DRAM",
choices=list(dram_generators.keys()),
help="DRAM: Random traffic; \
DRAM_ROTATE: Traffic rotating across banks and ranks",
)
parser.add_argument("--addr-map",
choices=ObjectList.dram_addr_map_list.get_names(),
default="RoRaBaCoCh", help = "DRAM address map policy")
parser.add_argument(
"--addr-map",
choices=ObjectList.dram_addr_map_list.get_names(),
default="RoRaBaCoCh",
help="DRAM address map policy",
)
args = parser.parse_args()
@@ -86,13 +101,13 @@ args = parser.parse_args()
# start with the system itself, using a multi-layer 2.0 GHz
# crossbar, delivering 64 bytes / 3 cycles (one header cycle)
# which amounts to 42.7 GByte/s per layer and thus per port
system = System(membus = IOXBar(width = 32))
system.clk_domain = SrcClockDomain(clock = '2.0GHz',
voltage_domain =
VoltageDomain(voltage = '1V'))
system = System(membus=IOXBar(width=32))
system.clk_domain = SrcClockDomain(
clock="2.0GHz", voltage_domain=VoltageDomain(voltage="1V")
)
# we are fine with 256 MB memory for now
mem_range = AddrRange('256MB')
mem_range = AddrRange("256MB")
system.mem_ranges = [mem_range]
# do not worry about reserving space for the backing store
@@ -131,18 +146,31 @@ period = 250000000
nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
# determine the burst length in bytes
burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
system.mem_ctrls[0].dram.device_bus_width.value *
system.mem_ctrls[0].dram.burst_length.value) / 8)
burst_size = int(
(
system.mem_ctrls[0].dram.devices_per_rank.value
* system.mem_ctrls[0].dram.device_bus_width.value
* system.mem_ctrls[0].dram.burst_length.value
)
/ 8
)
# next, get the page size in bytes
page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
system.mem_ctrls[0].dram.device_rowbuffer_size.value
page_size = (
system.mem_ctrls[0].dram.devices_per_rank.value
* system.mem_ctrls[0].dram.device_rowbuffer_size.value
)
# match the maximum bandwidth of the memory, the parameter is in seconds
# and we need it in ticks (ps)
itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
itt = (
getattr(
system.mem_ctrls[0].dram.tBURST_MIN,
"value",
system.mem_ctrls[0].dram.tBURST.value,
)
* 1000000000000
)
# assume we start at 0
max_addr = mem_range.end
@@ -168,27 +196,43 @@ system.system_port = system.membus.cpu_side_ports
periodicStatDump(period)
# run Forrest, run!
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
root = Root(full_system=False, system=system)
root.system.mem_mode = "timing"
m5.instantiate()
def trace():
addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
generator = dram_generators[args.mode](system.tgen)
for stride_size in range(burst_size, max_stride + 1, burst_size):
for bank in range(1, nbr_banks + 1):
num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
yield generator(period,
0, max_addr, burst_size, int(itt), int(itt),
args.rd_perc, 0,
num_seq_pkts, page_size, nbr_banks, bank,
addr_map, args.mem_ranks)
yield generator(
period,
0,
max_addr,
burst_size,
int(itt),
int(itt),
args.rd_perc,
0,
num_seq_pkts,
page_size,
nbr_banks,
bank,
addr_map,
args.mem_ranks,
)
yield system.tgen.createExit(0)
system.tgen.start(trace())
m5.simulate()
print("DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
generation period: %d" % (burst_size, nbr_banks, max_stride, itt))
print(
"DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
generation period: %d"
% (burst_size, nbr_banks, max_stride, itt)
)

File diff suppressed because it is too large Load Diff

View File

@@ -46,7 +46,7 @@ from m5.objects import *
from m5.options import *
import argparse
m5.util.addToPath('../..')
m5.util.addToPath("../..")
from common import SysPaths
from common import MemConfig
@@ -60,25 +60,21 @@ import workloads
# l1_icache_class, l1_dcache_class, walk_cache_class, l2_Cache_class). Any of
# the cache class may be 'None' if the particular cache is not present.
cpu_types = {
"atomic" : ( AtomicSimpleCPU, None, None, None),
"minor" : (MinorCPU,
devices.L1I, devices.L1D,
devices.L2),
"hpi" : ( HPI.HPI,
HPI.HPI_ICache, HPI.HPI_DCache,
HPI.HPI_L2)
"atomic": (AtomicSimpleCPU, None, None, None),
"minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
"hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
}
def create_cow_image(name):
"""Helper function to create a Copy-on-Write disk image"""
image = CowDiskImage()
image.child.image_file = name
return image;
return image
def create(args):
''' Create and configure the system object. '''
"""Create and configure the system object."""
if args.readfile and not os.path.isfile(args.readfile):
print("Error: Bootscript %s does not exist" % args.readfile)
@@ -93,11 +89,13 @@ def create(args):
platform = ObjectList.platform_list.get(args.machine_type)
system = devices.SimpleSystem(want_caches,
args.mem_size,
platform=platform(),
mem_mode=mem_mode,
readfile=args.readfile)
system = devices.SimpleSystem(
want_caches,
args.mem_size,
platform=platform(),
mem_mode=mem_mode,
readfile=args.readfile,
)
MemConfig.config_mem(args, system)
@@ -107,7 +105,7 @@ def create(args):
stdout=args.semi_stdout,
stderr=args.semi_stderr,
files_root_dir=args.semi_path,
cmd_line = " ".join([ object_file ] + args.args)
cmd_line=" ".join([object_file] + args.args),
)
if args.disk_image:
@@ -116,17 +114,17 @@ def create(args):
# functionality to avoid writing changes to the stored copy of
# the disk image.
system.realview.vio[0].vio = VirtIOBlock(
image=create_cow_image(args.disk_image))
image=create_cow_image(args.disk_image)
)
# Wire up the system's memory system
system.connect()
# Add CPU clusters to the system
system.cpu_cluster = [
devices.CpuCluster(system,
args.num_cores,
args.cpu_freq, "1.0V",
*cpu_types[args.cpu]),
devices.CpuCluster(
system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
)
]
# Create a cache hierarchy for the cluster. We are assuming that
@@ -143,11 +141,11 @@ def create(args):
system.highest_el_is_64 = True
workload_class = workloads.workload_list.get(args.workload)
system.workload = workload_class(
object_file, system)
system.workload = workload_class(object_file, system)
return system
def run(args):
cptdir = m5.options.outdir
if args.checkpoint:
@@ -171,67 +169,118 @@ def run(args):
def main():
parser = argparse.ArgumentParser(epilog=__doc__)
parser.add_argument("--kernel", type=str,
default=None,
help="Binary to run")
parser.add_argument("--workload", type=str,
default="ArmBaremetal",
choices=workloads.workload_list.get_names(),
help="Workload type")
parser.add_argument("--disk-image", type=str,
default=None,
help="Disk to instantiate")
parser.add_argument("--readfile", type=str, default="",
help = "File to return with the m5 readfile command")
parser.add_argument("--cpu", type=str, choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use")
parser.add_argument(
"--kernel", type=str, default=None, help="Binary to run"
)
parser.add_argument(
"--workload",
type=str,
default="ArmBaremetal",
choices=workloads.workload_list.get_names(),
help="Workload type",
)
parser.add_argument(
"--disk-image", type=str, default=None, help="Disk to instantiate"
)
parser.add_argument(
"--readfile",
type=str,
default="",
help="File to return with the m5 readfile command",
)
parser.add_argument(
"--cpu",
type=str,
choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use",
)
parser.add_argument("--cpu-freq", type=str, default="4GHz")
parser.add_argument("--num-cores", type=int, default=1,
help="Number of CPU cores")
parser.add_argument("--machine-type", type=str,
choices=ObjectList.platform_list.get_names(),
default="VExpress_GEM5_V2",
help="Hardware platform class")
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument("--mem-channels", type=int, default=1,
help = "number of memory channels")
parser.add_argument("--mem-ranks", type=int, default=None,
help = "number of memory ranks per channel")
parser.add_argument("--mem-size", action="store", type=str,
default="2GB",
help="Specify the physical memory size")
parser.add_argument(
"--num-cores", type=int, default=1, help="Number of CPU cores"
)
parser.add_argument(
"--machine-type",
type=str,
choices=ObjectList.platform_list.get_names(),
default="VExpress_GEM5_V2",
help="Hardware platform class",
)
parser.add_argument(
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument(
"--mem-channels", type=int, default=1, help="number of memory channels"
)
parser.add_argument(
"--mem-ranks",
type=int,
default=None,
help="number of memory ranks per channel",
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="2GB",
help="Specify the physical memory size",
)
parser.add_argument("--checkpoint", action="store_true")
parser.add_argument("--restore", type=str, default=None)
parser.add_argument("--dtb-gen", action="store_true",
help="Doesn't run simulation, it generates a DTB only")
parser.add_argument("--semi-enable", action="store_true",
help="Enable semihosting support")
parser.add_argument("--semi-stdin", type=str, default="stdin",
help="Standard input for semihosting " \
"(default: gem5's stdin)")
parser.add_argument("--semi-stdout", type=str, default="stdout",
help="Standard output for semihosting " \
"(default: gem5's stdout)")
parser.add_argument("--semi-stderr", type=str, default="stderr",
help="Standard error for semihosting " \
"(default: gem5's stderr)")
parser.add_argument('--semi-path', type=str,
default="",
help=('Search path for files to be loaded through '
'Arm Semihosting'))
parser.add_argument("args", default=[], nargs="*",
help="Semihosting arguments to pass to benchmark")
parser.add_argument("-P", "--param", action="append", default=[],
parser.add_argument(
"--dtb-gen",
action="store_true",
help="Doesn't run simulation, it generates a DTB only",
)
parser.add_argument(
"--semi-enable", action="store_true", help="Enable semihosting support"
)
parser.add_argument(
"--semi-stdin",
type=str,
default="stdin",
help="Standard input for semihosting " "(default: gem5's stdin)",
)
parser.add_argument(
"--semi-stdout",
type=str,
default="stdout",
help="Standard output for semihosting " "(default: gem5's stdout)",
)
parser.add_argument(
"--semi-stderr",
type=str,
default="stderr",
help="Standard error for semihosting " "(default: gem5's stderr)",
)
parser.add_argument(
"--semi-path",
type=str,
default="",
help=("Search path for files to be loaded through " "Arm Semihosting"),
)
parser.add_argument(
"args",
default=[],
nargs="*",
help="Semihosting arguments to pass to benchmark",
)
parser.add_argument(
"-P",
"--param",
action="append",
default=[],
help="Set a SimObject parameter relative to the root node. "
"An extended Python multi range slicing syntax can be used "
"for arrays. For example: "
"'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
"sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
"Direct parameters of the root object are not accessible, "
"only parameters of its children.")
"An extended Python multi range slicing syntax can be used "
"for arrays. For example: "
"'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
"sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
"Direct parameters of the root object are not accessible, "
"only parameters of its children.",
)
args = parser.parse_args()
@@ -247,9 +296,10 @@ def main():
if args.dtb_gen:
# No run, autogenerate DTB and exit
root.system.generateDtb(os.path.join(m5.options.outdir, 'system.dtb'))
root.system.generateDtb(os.path.join(m5.options.outdir, "system.dtb"))
else:
run(args)
if __name__ == "__m5_main__":
main()

View File

@@ -37,20 +37,22 @@
import m5
from m5.objects import *
m5.util.addToPath('../../')
m5.util.addToPath("../../")
from common.Caches import *
from common import ObjectList
have_kvm = "ArmV8KvmCPU" in ObjectList.cpu_list.get_names()
have_fastmodel = "FastModelCortexA76" in ObjectList.cpu_list.get_names()
class L1I(L1_ICache):
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 4
tgts_per_mshr = 8
size = '48kB'
size = "48kB"
assoc = 3
@@ -60,7 +62,7 @@ class L1D(L1_DCache):
response_latency = 1
mshrs = 16
tgts_per_mshr = 16
size = '32kB'
size = "32kB"
assoc = 2
write_buffers = 16
@@ -71,21 +73,21 @@ class L2(L2Cache):
response_latency = 5
mshrs = 32
tgts_per_mshr = 8
size = '1MB'
size = "1MB"
assoc = 16
write_buffers = 8
clusivity='mostly_excl'
clusivity = "mostly_excl"
class L3(Cache):
size = '16MB'
size = "16MB"
assoc = 16
tag_latency = 20
data_latency = 20
response_latency = 20
mshrs = 20
tgts_per_mshr = 12
clusivity='mostly_excl'
clusivity = "mostly_excl"
class MemBus(SystemXBar):
@@ -94,8 +96,17 @@ class MemBus(SystemXBar):
class CpuCluster(SubSystem):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage,
cpu_type, l1i_type, l1d_type, l2_type):
def __init__(
self,
system,
num_cpus,
cpu_clock,
cpu_voltage,
cpu_type,
l1i_type,
l1d_type,
l2_type,
):
super(CpuCluster, self).__init__()
self._cpu_type = cpu_type
self._l1i_type = l1i_type
@@ -105,12 +116,16 @@ class CpuCluster(SubSystem):
assert num_cpus > 0
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
self.clk_domain = SrcClockDomain(clock=cpu_clock,
voltage_domain=self.voltage_domain)
self.clk_domain = SrcClockDomain(
clock=cpu_clock, voltage_domain=self.voltage_domain
)
self.cpus = [ self._cpu_type(cpu_id=system.numCpus() + idx,
clk_domain=self.clk_domain)
for idx in range(num_cpus) ]
self.cpus = [
self._cpu_type(
cpu_id=system.numCpus() + idx, clk_domain=self.clk_domain
)
for idx in range(num_cpus)
]
for cpu in self.cpus:
cpu.createThreads()
@@ -157,11 +172,14 @@ class CpuCluster(SubSystem):
int_cls = ArmPPI if pint < 32 else ArmSPI
for isa in cpu.isa:
isa.pmu = ArmPMU(interrupt=int_cls(num=pint))
isa.pmu.addArchEvents(cpu=cpu,
itb=cpu.mmu.itb, dtb=cpu.mmu.dtb,
icache=getattr(cpu, 'icache', None),
dcache=getattr(cpu, 'dcache', None),
l2cache=getattr(self, 'l2', None))
isa.pmu.addArchEvents(
cpu=cpu,
itb=cpu.mmu.itb,
dtb=cpu.mmu.dtb,
icache=getattr(cpu, "icache", None),
dcache=getattr(cpu, "dcache", None),
l2cache=getattr(self, "l2", None),
)
for ev in events:
isa.pmu.addEvent(ev)
@@ -175,42 +193,55 @@ class CpuCluster(SubSystem):
class AtomicCluster(CpuCluster):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("AtomicSimpleCPU"), None,
None, None ]
super(AtomicCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
cpu_config = [
ObjectList.cpu_list.get("AtomicSimpleCPU"),
None,
None,
None,
]
super(AtomicCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
def addL1(self):
pass
class KvmCluster(CpuCluster):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None,
None ]
super(KvmCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None]
super(KvmCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
def addL1(self):
pass
class FastmodelCluster(SubSystem):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
super(FastmodelCluster, self).__init__()
# Setup GIC
gic = system.realview.gic
gic.sc_gic.cpu_affinities = ','.join(
[ '0.0.%d.0' % i for i in range(num_cpus) ])
gic.sc_gic.cpu_affinities = ",".join(
["0.0.%d.0" % i for i in range(num_cpus)]
)
# Parse the base address of redistributor.
redist_base = gic.get_redist_bases()[0]
redist_frame_size = 0x40000 if gic.sc_gic.has_gicv4_1 else 0x20000
gic.sc_gic.reg_base_per_redistributor = ','.join([
'0.0.%d.0=%#x' % (i, redist_base + redist_frame_size * i)
for i in range(num_cpus)
])
gic.sc_gic.reg_base_per_redistributor = ",".join(
[
"0.0.%d.0=%#x" % (i, redist_base + redist_frame_size * i)
for i in range(num_cpus)
]
)
gic_a2t = AmbaToTlmBridge64(amba=gic.amba_m)
gic_t2g = TlmToGem5Bridge64(tlm=gic_a2t.tlm,
gem5=system.iobus.cpu_side_ports)
gic_t2g = TlmToGem5Bridge64(
tlm=gic_a2t.tlm, gem5=system.iobus.cpu_side_ports
)
gic_g2t = Gem5ToTlmBridge64(gem5=system.membus.mem_side_ports)
gic_g2t.addr_ranges = gic.get_addr_ranges()
gic_t2a = AmbaFromTlmBridge64(tlm=gic_g2t.tlm)
@@ -223,28 +254,36 @@ class FastmodelCluster(SubSystem):
system.gic_hub.gic_t2a = gic_t2a
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
self.clk_domain = SrcClockDomain(clock=cpu_clock,
voltage_domain=self.voltage_domain)
self.clk_domain = SrcClockDomain(
clock=cpu_clock, voltage_domain=self.voltage_domain
)
# Setup CPU
assert num_cpus <= 4
CpuClasses = [FastModelCortexA76x1, FastModelCortexA76x2,
FastModelCortexA76x3, FastModelCortexA76x4]
CpuClasses = [
FastModelCortexA76x1,
FastModelCortexA76x2,
FastModelCortexA76x3,
FastModelCortexA76x4,
]
CpuClass = CpuClasses[num_cpus - 1]
cpu = CpuClass(GICDISABLE=False)
cpu = CpuClass(
GICDISABLE=False, BROADCASTATOMIC=False, BROADCASTOUTER=False
)
for core in cpu.cores:
core.semihosting_enable = False
core.RVBARADDR = 0x10
core.redistributor = gic.redistributor
core.createThreads()
core.createInterruptController()
self.cpus = [ cpu ]
self.cpus = [cpu]
self.cpu_hub = SubSystem()
a2t = AmbaToTlmBridge64(amba=cpu.amba)
t2g = TlmToGem5Bridge64(tlm=a2t.tlm, gem5=system.membus.cpu_side_ports)
system.gic_hub.a2t = a2t
system.gic_hub.t2g = t2g
self.cpu_hub.a2t = a2t
self.cpu_hub.t2g = t2g
system.addCpuCluster(self, num_cpus)
@@ -252,7 +291,7 @@ class FastmodelCluster(SubSystem):
return False
def memoryMode(self):
return 'atomic_noncaching'
return "atomic_noncaching"
def addL1(self):
pass
@@ -263,6 +302,7 @@ class FastmodelCluster(SubSystem):
def connectMemSide(self, bus):
pass
class BaseSimpleSystem(ArmSystem):
cache_line_size = 64
@@ -271,15 +311,15 @@ class BaseSimpleSystem(ArmSystem):
self.voltage_domain = VoltageDomain(voltage="1.0V")
self.clk_domain = SrcClockDomain(
clock="1GHz",
voltage_domain=Parent.voltage_domain)
clock="1GHz", voltage_domain=Parent.voltage_domain
)
if platform is None:
self.realview = VExpress_GEM5_V1()
else:
self.realview = platform
if hasattr(self.realview.gic, 'cpu_addr'):
if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
self.terminal = Terminal()
@@ -305,7 +345,8 @@ class BaseSimpleSystem(ArmSystem):
size_in_range = min(mem_size, mem_range.size())
mem_ranges.append(
AddrRange(start=mem_range.start, size=size_in_range))
AddrRange(start=mem_range.start, size=size_in_range)
)
mem_size -= size_in_range
if mem_size == 0:
@@ -340,8 +381,9 @@ class BaseSimpleSystem(ArmSystem):
for cluster in self._clusters:
cluster.addL2(cluster.clk_domain)
if last_cache_level > 2:
max_clock_cluster = max(self._clusters,
key=lambda c: c.clk_domain.clock[0])
max_clock_cluster = max(
self._clusters, key=lambda c: c.clk_domain.clock[0]
)
self.l3 = L3(clk_domain=max_clock_cluster.clk_domain)
self.toL3Bus = L2XBar(width=64)
self.toL3Bus.mem_side_ports = self.l3.cpu_side
@@ -352,23 +394,24 @@ class BaseSimpleSystem(ArmSystem):
for cluster in self._clusters:
cluster.connectMemSide(cluster_mem_bus)
class SimpleSystem(BaseSimpleSystem):
"""
Meant to be used with the classic memory model
"""
def __init__(self, caches, mem_size, platform=None, **kwargs):
super(SimpleSystem, self).__init__(mem_size, platform, **kwargs)
self.membus = MemBus()
# CPUs->PIO
self.iobridge = Bridge(delay='50ns')
self.iobridge = Bridge(delay="50ns")
self._caches = caches
if self._caches:
self.iocache = IOCache(addr_ranges=self.mem_ranges)
else:
self.dmabridge = Bridge(delay='50ns',
ranges=self.mem_ranges)
self.dmabridge = Bridge(delay="50ns", ranges=self.mem_ranges)
def connect(self):
self.iobridge.mem_side_port = self.iobus.cpu_side_ports
@@ -381,7 +424,7 @@ class SimpleSystem(BaseSimpleSystem):
self.dmabridge.mem_side_port = self.membus.cpu_side_ports
self.dmabridge.cpu_side_port = self.iobus.mem_side_ports
if hasattr(self.realview.gic, 'cpu_addr'):
if hasattr(self.realview.gic, "cpu_addr"):
self.gic_cpu_addr = self.realview.gic.cpu_addr
self.realview.attachOnChipIO(self.membus, self.iobridge)
self.realview.attachIO(self.iobus)
@@ -390,18 +433,21 @@ class SimpleSystem(BaseSimpleSystem):
def attach_pci(self, dev):
self.realview.attachPciDevice(dev, self.iobus)
class ArmRubySystem(BaseSimpleSystem):
"""
Meant to be used with ruby
"""
def __init__(self, mem_size, platform=None, **kwargs):
super(ArmRubySystem, self).__init__(mem_size, platform, **kwargs)
self._dma_ports = []
self._mem_ports = []
def connect(self):
self.realview.attachOnChipIO(self.iobus,
dma_ports=self._dma_ports, mem_ports=self._mem_ports)
self.realview.attachOnChipIO(
self.iobus, dma_ports=self._dma_ports, mem_ports=self._mem_ports
)
self.realview.attachIO(self.iobus, dma_ports=self._dma_ports)
@@ -410,5 +456,6 @@ class ArmRubySystem(BaseSimpleSystem):
self.ruby._cpu_ports[i].connectCpuPorts(cpu)
def attach_pci(self, dev):
self.realview.attachPciDevice(dev, self.iobus,
dma_ports=self._dma_ports)
self.realview.attachPciDevice(
dev, self.iobus, dma_ports=self._dma_ports
)

View File

@@ -43,55 +43,96 @@ import m5
from m5.objects import *
import fs_bigLITTLE as bL
m5.util.addToPath("../../dist")
import sw
def addOptions(parser):
# Options for distributed simulation (i.e. dist-gem5)
parser.add_argument("--dist", action="store_true", help="Distributed gem5"\
" simulation.")
parser.add_argument("--is-switch", action="store_true",
help="Select the network switch simulator process for"\
" a distributed gem5 run.")
parser.add_argument("--dist-rank", default=0, action="store", type=int,
help="Rank of this system within the dist gem5 run.")
parser.add_argument("--dist-size", default=0, action="store", type=int,
help="Number of gem5 processes within the dist gem5"\
" run.")
parser.add_argument("--dist-server-name",
default="127.0.0.1",
action="store", type=str,
help="Name of the message server host\nDEFAULT:"\
" localhost")
parser.add_argument("--dist-server-port",
default=2200,
action="store", type=int,
help="Message server listen port\nDEFAULT: 2200")
parser.add_argument("--dist-sync-repeat",
default="0us",
action="store", type=str,
help="Repeat interval for synchronisation barriers"\
" among dist-gem5 processes\nDEFAULT:"\
" --ethernet-linkdelay")
parser.add_argument("--dist-sync-start",
default="1000000000000t",
action="store", type=str,
help="Time to schedule the first dist synchronisation"\
" barrier\nDEFAULT:1000000000000t")
parser.add_argument("--ethernet-linkspeed", default="10Gbps",
action="store", type=str,
help="Link speed in bps\nDEFAULT: 10Gbps")
parser.add_argument("--ethernet-linkdelay", default="10us",
action="store", type=str,
help="Link delay in seconds\nDEFAULT: 10us")
parser.add_argument("--etherdump", action="store", type=str, default="",
help="Specify the filename to dump a pcap capture of"\
" the ethernet traffic")
# Options for distributed simulation (i.e. dist-gem5)
parser.add_argument(
"--dist", action="store_true", help="Distributed gem5" " simulation."
)
parser.add_argument(
"--is-switch",
action="store_true",
help="Select the network switch simulator process for"
" a distributed gem5 run.",
)
parser.add_argument(
"--dist-rank",
default=0,
action="store",
type=int,
help="Rank of this system within the dist gem5 run.",
)
parser.add_argument(
"--dist-size",
default=0,
action="store",
type=int,
help="Number of gem5 processes within the dist gem5" " run.",
)
parser.add_argument(
"--dist-server-name",
default="127.0.0.1",
action="store",
type=str,
help="Name of the message server host\nDEFAULT:" " localhost",
)
parser.add_argument(
"--dist-server-port",
default=2200,
action="store",
type=int,
help="Message server listen port\nDEFAULT: 2200",
)
parser.add_argument(
"--dist-sync-repeat",
default="0us",
action="store",
type=str,
help="Repeat interval for synchronisation barriers"
" among dist-gem5 processes\nDEFAULT:"
" --ethernet-linkdelay",
)
parser.add_argument(
"--dist-sync-start",
default="1000000000000t",
action="store",
type=str,
help="Time to schedule the first dist synchronisation"
" barrier\nDEFAULT:1000000000000t",
)
parser.add_argument(
"--ethernet-linkspeed",
default="10Gbps",
action="store",
type=str,
help="Link speed in bps\nDEFAULT: 10Gbps",
)
parser.add_argument(
"--ethernet-linkdelay",
default="10us",
action="store",
type=str,
help="Link delay in seconds\nDEFAULT: 10us",
)
parser.add_argument(
"--etherdump",
action="store",
type=str,
default="",
help="Specify the filename to dump a pcap capture of"
" the ethernet traffic",
)
# Used by util/dist/gem5-dist.sh
parser.add_argument("--checkpoint-dir", type=str,
default=m5.options.outdir,
help="Directory to save/read checkpoints")
parser.add_argument(
"--checkpoint-dir",
type=str,
default=m5.options.outdir,
help="Directory to save/read checkpoints",
)
def addEthernet(system, options):
@@ -101,14 +142,16 @@ def addEthernet(system, options):
system.ethernet = dev
# create distributed ethernet link
system.etherlink = DistEtherLink(speed = options.ethernet_linkspeed,
delay = options.ethernet_linkdelay,
dist_rank = options.dist_rank,
dist_size = options.dist_size,
server_name = options.dist_server_name,
server_port = options.dist_server_port,
sync_start = options.dist_sync_start,
sync_repeat = options.dist_sync_repeat)
system.etherlink = DistEtherLink(
speed=options.ethernet_linkspeed,
delay=options.ethernet_linkdelay,
dist_rank=options.dist_rank,
dist_size=options.dist_size,
server_name=options.dist_server_name,
server_port=options.dist_server_port,
sync_start=options.dist_sync_start,
sync_repeat=options.dist_sync_repeat,
)
system.etherlink.int0 = Parent.system.ethernet.interface
if options.etherdump:
system.etherdump = EtherDump(file=options.etherdump)
@@ -117,15 +160,15 @@ def addEthernet(system, options):
def main():
parser = argparse.ArgumentParser(
description="Generic ARM big.LITTLE configuration with "\
"dist-gem5 support")
description="Generic ARM big.LITTLE configuration with "
"dist-gem5 support"
)
bL.addOptions(parser)
addOptions(parser)
options = parser.parse_args()
if options.is_switch:
root = Root(full_system = True,
system = sw.build_switch(options))
root = Root(full_system=True, system=sw.build_switch(options))
else:
root = bL.build(options)
addEthernet(root.system, options)

View File

@@ -55,84 +55,121 @@ import devices
from devices import AtomicCluster, KvmCluster, FastmodelCluster
default_disk = 'aarch64-ubuntu-trusty-headless.img'
default_disk = "aarch64-ubuntu-trusty-headless.img"
default_mem_size = "2GB"
default_mem_size= "2GB"
def _to_ticks(value):
"""Helper function to convert a latency from string format to Ticks"""
return m5.ticks.fromSeconds(m5.util.convert.anyToLatency(value))
def _using_pdes(root):
"""Determine if the simulator is using multiple parallel event queues"""
for obj in root.descendants():
if not m5.proxy.isproxy(obj.eventq_index) and \
obj.eventq_index != root.eventq_index:
if (
not m5.proxy.isproxy(obj.eventq_index)
and obj.eventq_index != root.eventq_index
):
return True
return False
class BigCluster(devices.CpuCluster):
def __init__(self, system, num_cpus, cpu_clock,
cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("O3_ARM_v7a_3"),
devices.L1I, devices.L1D, devices.L2 ]
super(BigCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [
ObjectList.cpu_list.get("O3_ARM_v7a_3"),
devices.L1I,
devices.L1D,
devices.L2,
]
super(BigCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
class LittleCluster(devices.CpuCluster):
def __init__(self, system, num_cpus, cpu_clock,
cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("MinorCPU"), devices.L1I,
devices.L1D, devices.L2 ]
super(LittleCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [
ObjectList.cpu_list.get("MinorCPU"),
devices.L1I,
devices.L1D,
devices.L2,
]
super(LittleCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
class Ex5BigCluster(devices.CpuCluster):
def __init__(self, system, num_cpus, cpu_clock,
cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("ex5_big"), ex5_big.L1I,
ex5_big.L1D, ex5_big.L2 ]
super(Ex5BigCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [
ObjectList.cpu_list.get("ex5_big"),
ex5_big.L1I,
ex5_big.L1D,
ex5_big.L2,
]
super(Ex5BigCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
class Ex5LittleCluster(devices.CpuCluster):
def __init__(self, system, num_cpus, cpu_clock,
cpu_voltage="1.0V"):
cpu_config = [ ObjectList.cpu_list.get("ex5_LITTLE"),
ex5_LITTLE.L1I, ex5_LITTLE.L1D,
ex5_LITTLE.L2 ]
super(Ex5LittleCluster, self).__init__(system, num_cpus, cpu_clock,
cpu_voltage, *cpu_config)
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
cpu_config = [
ObjectList.cpu_list.get("ex5_LITTLE"),
ex5_LITTLE.L1I,
ex5_LITTLE.L1D,
ex5_LITTLE.L2,
]
super(Ex5LittleCluster, self).__init__(
system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
)
def createSystem(caches, kernel, bootscript, machine_type="VExpress_GEM5",
disks=[], mem_size=default_mem_size, bootloader=None):
def createSystem(
caches,
kernel,
bootscript,
machine_type="VExpress_GEM5",
disks=[],
mem_size=default_mem_size,
bootloader=None,
):
platform = ObjectList.platform_list.get(machine_type)
m5.util.inform("Simulated platform: %s", platform.__name__)
sys = devices.SimpleSystem(caches, mem_size, platform(),
workload=ArmFsLinux(
object_file=SysPaths.binary(kernel)),
readfile=bootscript)
sys = devices.SimpleSystem(
caches,
mem_size,
platform(),
workload=ArmFsLinux(object_file=SysPaths.binary(kernel)),
readfile=bootscript,
)
sys.mem_ctrls = [ SimpleMemory(range=r, port=sys.membus.mem_side_ports)
for r in sys.mem_ranges ]
sys.mem_ctrls = [
SimpleMemory(range=r, port=sys.membus.mem_side_ports)
for r in sys.mem_ranges
]
sys.connect()
# Attach disk images
if disks:
def cow_disk(image_file):
image = CowDiskImage()
image.child.image_file = SysPaths.disk(image_file)
return image
sys.disk_images = [ cow_disk(f) for f in disks ]
sys.pci_vio_block = [ PciVirtIO(vio=VirtIOBlock(image=img))
for img in sys.disk_images ]
sys.disk_images = [cow_disk(f) for f in disks]
sys.pci_vio_block = [
PciVirtIO(vio=VirtIOBlock(image=img)) for img in sys.disk_images
]
for dev in sys.pci_vio_block:
sys.attach_pci(dev)
@@ -140,10 +177,11 @@ def createSystem(caches, kernel, bootscript, machine_type="VExpress_GEM5",
return sys
cpu_types = {
"atomic" : (AtomicCluster, AtomicCluster),
"timing" : (BigCluster, LittleCluster),
"exynos" : (Ex5BigCluster, Ex5LittleCluster),
"atomic": (AtomicCluster, AtomicCluster),
"timing": (BigCluster, LittleCluster),
"exynos": (Ex5BigCluster, Ex5LittleCluster),
}
# Only add the KVM CPU if it has been compiled into gem5
@@ -154,66 +192,143 @@ if devices.have_kvm:
if devices.have_fastmodel:
cpu_types["fastmodel"] = (FastmodelCluster, FastmodelCluster)
def addOptions(parser):
parser.add_argument("--restore-from", type=str, default=None,
help="Restore from checkpoint")
parser.add_argument("--dtb", type=str, default=None,
help="DTB file to load")
parser.add_argument("--kernel", type=str, required=True,
help="Linux kernel")
parser.add_argument("--root", type=str, default="/dev/vda1",
help="Specify the kernel CLI root= argument")
parser.add_argument("--machine-type", type=str,
choices=ObjectList.platform_list.get_names(),
default="VExpress_GEM5",
help="Hardware platform class")
parser.add_argument("--disk", action="append", type=str, default=[],
help="Disks to instantiate")
parser.add_argument("--bootscript", type=str, default="",
help="Linux bootscript")
parser.add_argument("--cpu-type", type=str, choices=list(cpu_types.keys()),
default="timing",
help="CPU simulation mode. Default: %(default)s")
parser.add_argument("--kernel-init", type=str, default="/sbin/init",
help="Override init")
parser.add_argument("--big-cpus", type=int, default=1,
help="Number of big CPUs to instantiate")
parser.add_argument("--little-cpus", type=int, default=1,
help="Number of little CPUs to instantiate")
parser.add_argument("--caches", action="store_true", default=False,
help="Instantiate caches")
parser.add_argument("--last-cache-level", type=int, default=2,
help="Last level of caches (e.g. 3 for L3)")
parser.add_argument("--big-cpu-clock", type=str, default="2GHz",
help="Big CPU clock frequency")
parser.add_argument("--little-cpu-clock", type=str, default="1GHz",
help="Little CPU clock frequency")
parser.add_argument("--sim-quantum", type=str, default="1ms",
help="Simulation quantum for parallel simulation. " \
"Default: %(default)s")
parser.add_argument("--mem-size", type=str, default=default_mem_size,
help="System memory size")
parser.add_argument("--kernel-cmd", type=str, default=None,
help="Custom Linux kernel command")
parser.add_argument("--bootloader", action="append",
help="executable file that runs before the --kernel")
parser.add_argument("--kvm-userspace-gic", action="store_true",
default=False,
help="Use the gem5 GIC in a KVM simulation")
parser.add_argument("-P", "--param", action="append", default=[],
parser.add_argument(
"--restore-from",
type=str,
default=None,
help="Restore from checkpoint",
)
parser.add_argument(
"--dtb", type=str, default=None, help="DTB file to load"
)
parser.add_argument(
"--kernel", type=str, required=True, help="Linux kernel"
)
parser.add_argument(
"--root",
type=str,
default="/dev/vda1",
help="Specify the kernel CLI root= argument",
)
parser.add_argument(
"--machine-type",
type=str,
choices=ObjectList.platform_list.get_names(),
default="VExpress_GEM5",
help="Hardware platform class",
)
parser.add_argument(
"--disk",
action="append",
type=str,
default=[],
help="Disks to instantiate",
)
parser.add_argument(
"--bootscript", type=str, default="", help="Linux bootscript"
)
parser.add_argument(
"--cpu-type",
type=str,
choices=list(cpu_types.keys()),
default="timing",
help="CPU simulation mode. Default: %(default)s",
)
parser.add_argument(
"--kernel-init", type=str, default="/sbin/init", help="Override init"
)
parser.add_argument(
"--big-cpus",
type=int,
default=1,
help="Number of big CPUs to instantiate",
)
parser.add_argument(
"--little-cpus",
type=int,
default=1,
help="Number of little CPUs to instantiate",
)
parser.add_argument(
"--caches",
action="store_true",
default=False,
help="Instantiate caches",
)
parser.add_argument(
"--last-cache-level",
type=int,
default=2,
help="Last level of caches (e.g. 3 for L3)",
)
parser.add_argument(
"--big-cpu-clock",
type=str,
default="2GHz",
help="Big CPU clock frequency",
)
parser.add_argument(
"--little-cpu-clock",
type=str,
default="1GHz",
help="Little CPU clock frequency",
)
parser.add_argument(
"--sim-quantum",
type=str,
default="1ms",
help="Simulation quantum for parallel simulation. "
"Default: %(default)s",
)
parser.add_argument(
"--mem-size",
type=str,
default=default_mem_size,
help="System memory size",
)
parser.add_argument(
"--kernel-cmd",
type=str,
default=None,
help="Custom Linux kernel command",
)
parser.add_argument(
"--bootloader",
action="append",
help="executable file that runs before the --kernel",
)
parser.add_argument(
"--kvm-userspace-gic",
action="store_true",
default=False,
help="Use the gem5 GIC in a KVM simulation",
)
parser.add_argument(
"-P",
"--param",
action="append",
default=[],
help="Set a SimObject parameter relative to the root node. "
"An extended Python multi range slicing syntax can be used "
"for arrays. For example: "
"'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
"sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
"Direct parameters of the root object are not accessible, "
"only parameters of its children.")
parser.add_argument("--vio-9p", action="store_true",
help=Options.vio_9p_help)
parser.add_argument("--dtb-gen", action="store_true",
help="Doesn't run simulation, it generates a DTB only")
"An extended Python multi range slicing syntax can be used "
"for arrays. For example: "
"'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' "
"sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 "
"Direct parameters of the root object are not accessible, "
"only parameters of its children.",
)
parser.add_argument(
"--vio-9p", action="store_true", help=Options.vio_9p_help
)
parser.add_argument(
"--dtb-gen",
action="store_true",
help="Doesn't run simulation, it generates a DTB only",
)
return parser
def build(options):
m5.ticks.fixGlobalFrequency()
@@ -234,13 +349,15 @@ def build(options):
root = Root(full_system=True)
disks = [default_disk] if len(options.disk) == 0 else options.disk
system = createSystem(options.caches,
options.kernel,
options.bootscript,
options.machine_type,
disks=disks,
mem_size=options.mem_size,
bootloader=options.bootloader)
system = createSystem(
options.caches,
options.kernel,
options.bootscript,
options.machine_type,
disks=disks,
mem_size=options.mem_size,
bootloader=options.bootloader,
)
root.system = system
if options.kernel_cmd:
@@ -256,24 +373,28 @@ def build(options):
all_cpus = []
# big cluster
if options.big_cpus > 0:
system.bigCluster = big_model(system, options.big_cpus,
options.big_cpu_clock)
system.bigCluster = big_model(
system, options.big_cpus, options.big_cpu_clock
)
system.mem_mode = system.bigCluster.memoryMode()
all_cpus += system.bigCluster.cpus
# little cluster
if options.little_cpus > 0:
system.littleCluster = little_model(system, options.little_cpus,
options.little_cpu_clock)
system.littleCluster = little_model(
system, options.little_cpus, options.little_cpu_clock
)
system.mem_mode = system.littleCluster.memoryMode()
all_cpus += system.littleCluster.cpus
# Figure out the memory mode
if options.big_cpus > 0 and options.little_cpus > 0 and \
system.bigCluster.memoryMode() != system.littleCluster.memoryMode():
if (
options.big_cpus > 0
and options.little_cpus > 0
and system.bigCluster.memoryMode() != system.littleCluster.memoryMode()
):
m5.util.panic("Memory mode missmatch among CPU clusters")
# create caches
system.addCaches(options.caches, options.last_cache_level)
if not options.caches:
@@ -290,26 +411,31 @@ def build(options):
if options.dtb is not None:
system.workload.dtb_filename = SysPaths.binary(options.dtb)
else:
system.workload.dtb_filename = \
os.path.join(m5.options.outdir, 'system.dtb')
system.workload.dtb_filename = os.path.join(
m5.options.outdir, "system.dtb"
)
system.generateDtb(system.workload.dtb_filename)
if devices.have_fastmodel and issubclass(big_model, FastmodelCluster):
from m5 import arm_fast_model as fm, systemc as sc
# setup FastModels for simulation
fm.setup_simulation("cortexa76")
# setup SystemC
root.systemc_kernel = m5.objects.SystemC_Kernel()
m5.tlm.tlm_global_quantum_instance().set(
sc.sc_time(10000.0 / 100000000.0, sc.sc_time.SC_SEC))
sc.sc_time(10000.0 / 100000000.0, sc.sc_time.SC_SEC)
)
if options.vio_9p:
FSConfig.attach_9p(system.realview, system.iobus)
return root
def _build_kvm(options, system, cpus):
system.kvm_vm = KvmVM()
system.release = ArmDefaultRelease.for_kvm()
if options.kvm_userspace_gic:
# We will use the simulated GIC.
@@ -335,14 +461,15 @@ def _build_kvm(options, system, cpus):
cpu.eventq_index = first_cpu_eq + idx
def instantiate(options, checkpoint_dir=None):
# Setup the simulation quantum if we are running in PDES-mode
# (e.g., when using KVM)
root = Root.getInstance()
if root and _using_pdes(root):
m5.util.inform("Running in PDES mode with a %s simulation quantum.",
options.sim_quantum)
m5.util.inform(
"Running in PDES mode with a %s simulation quantum.",
options.sim_quantum,
)
root.sim_quantum = _to_ticks(options.sim_quantum)
# Get and load from the chkpt or simpoint checkpoint
@@ -381,16 +508,17 @@ def generateDtb(root):
def main():
parser = argparse.ArgumentParser(
description="Generic ARM big.LITTLE configuration")
description="Generic ARM big.LITTLE configuration"
)
addOptions(parser)
options = parser.parse_args()
root = build(options)
root.apply_config(options.param)
instantiate(options)
if options.dtb_gen:
generateDtb(root)
generateDtb(root)
else:
run()
run()
if __name__ == "__m5_main__":

View File

@@ -50,25 +50,29 @@ class CpuPowerOn(MathExprPowerModel):
super(CpuPowerOn, self).__init__(**kwargs)
# 2A per IPC, 3pA per cache miss
# and then convert to Watt
self.dyn = "voltage * (2 * {}.ipc + 3 * 0.000000001 * " \
"{}.dcache.overallMisses / simSeconds)".format(cpu_path,
cpu_path)
self.dyn = (
"voltage * (2 * {}.ipc + 3 * 0.000000001 * "
"{}.dcache.overallMisses / simSeconds)".format(cpu_path, cpu_path)
)
self.st = "4 * temp"
class CpuPowerOff(MathExprPowerModel):
dyn = "0"
st = "0"
class CpuPowerModel(PowerModel):
def __init__(self, cpu_path, **kwargs):
super(CpuPowerModel, self).__init__(**kwargs)
self.pm = [
CpuPowerOn(cpu_path), # ON
CpuPowerOff(), # CLK_GATED
CpuPowerOff(), # SRAM_RETENTION
CpuPowerOff(), # OFF
CpuPowerOn(cpu_path), # ON
CpuPowerOff(), # CLK_GATED
CpuPowerOff(), # SRAM_RETENTION
CpuPowerOff(), # OFF
]
class L2PowerOn(MathExprPowerModel):
def __init__(self, l2_path, **kwargs):
super(L2PowerOn, self).__init__(**kwargs)
@@ -78,26 +82,29 @@ class L2PowerOn(MathExprPowerModel):
self.dyn = "{}.overallAccesses * 0.000018000".format(l2_path)
self.st = "(voltage * 3)/10"
class L2PowerOff(MathExprPowerModel):
dyn = "0"
st = "0"
class L2PowerModel(PowerModel):
def __init__(self, l2_path, **kwargs):
super(L2PowerModel, self).__init__(**kwargs)
# Choose a power model for every power state
self.pm = [
L2PowerOn(l2_path), # ON
L2PowerOff(), # CLK_GATED
L2PowerOff(), # SRAM_RETENTION
L2PowerOff(), # OFF
L2PowerOn(l2_path), # ON
L2PowerOff(), # CLK_GATED
L2PowerOff(), # SRAM_RETENTION
L2PowerOff(), # OFF
]
def main():
parser = argparse.ArgumentParser(
description="Generic ARM big.LITTLE configuration with "\
"example power models")
description="Generic ARM big.LITTLE configuration with "
"example power models"
)
bL.addOptions(parser)
options = parser.parse_args()
@@ -125,13 +132,15 @@ def main():
bL.instantiate(options)
print("*" * 70)
print("WARNING: The power numbers generated by this script are "
print(
"WARNING: The power numbers generated by this script are "
"examples. They are not representative of any particular "
"implementation or process.")
"implementation or process."
)
print("*" * 70)
# Dumping stats periodically
m5.stats.periodicStatDump(m5.ticks.fromSeconds(0.1E-3))
m5.stats.periodicStatDump(m5.ticks.fromSeconds(0.1e-3))
bL.run()

View File

@@ -40,7 +40,7 @@ from m5.objects import *
from m5.options import *
import argparse
m5.util.addToPath('../..')
m5.util.addToPath("../..")
from common import MemConfig
from common import ObjectList
@@ -52,19 +52,20 @@ from ruby import Ruby
import devices
default_kernel = 'vmlinux.arm64'
default_disk = 'linaro-minimal-aarch64.img'
default_root_device = '/dev/vda1'
default_kernel = "vmlinux.arm64"
default_disk = "linaro-minimal-aarch64.img"
default_root_device = "/dev/vda1"
# Pre-defined CPU configurations.
cpu_types = {
"noncaching" : NonCachingSimpleCPU,
"minor" : MinorCPU,
"hpi" : HPI.HPI,
"o3" : O3_ARM_v7a.O3_ARM_v7a_3,
"noncaching": NonCachingSimpleCPU,
"minor": MinorCPU,
"hpi": HPI.HPI,
"o3": O3_ARM_v7a.O3_ARM_v7a_3,
}
def create_cow_image(name):
"""Helper function to create a Copy-on-Write disk image"""
image = CowDiskImage()
@@ -72,23 +73,31 @@ def create_cow_image(name):
return image
def config_ruby(system, args):
cpus = []
for cluster in system.cpu_cluster:
for cpu in cluster.cpus:
cpus.append(cpu)
Ruby.create_system(args, True, system, system.iobus,
system._dma_ports, system.realview.bootmem,
cpus)
Ruby.create_system(
args,
True,
system,
system.iobus,
system._dma_ports,
system.realview.bootmem,
cpus,
)
# Create a seperate clock domain for Ruby
system.ruby.clk_domain = SrcClockDomain(
clock = args.ruby_clock,
voltage_domain = system.voltage_domain)
clock=args.ruby_clock, voltage_domain=system.voltage_domain
)
def create(args):
''' Create and configure the system object. '''
"""Create and configure the system object."""
if args.script and not os.path.isfile(args.script):
print("Error: Bootscript %s does not exist" % args.script)
@@ -97,19 +106,25 @@ def create(args):
cpu_class = cpu_types[args.cpu]
mem_mode = cpu_class.memory_mode()
system = devices.ArmRubySystem(args.mem_size,
mem_mode=mem_mode,
workload=ArmFsLinux(
object_file=
SysPaths.binary(args.kernel)),
readfile=args.script)
system = devices.ArmRubySystem(
args.mem_size,
mem_mode=mem_mode,
workload=ArmFsLinux(object_file=SysPaths.binary(args.kernel)),
readfile=args.script,
)
# Add CPU clusters to the system
system.cpu_cluster = [
devices.CpuCluster(system,
args.num_cpus,
args.cpu_freq, "1.0V",
cpu_class, None, None, None),
devices.CpuCluster(
system,
args.num_cpus,
args.cpu_freq,
"1.0V",
cpu_class,
None,
None,
None,
)
]
# Add the PCI devices we need for this system. The base system
@@ -120,7 +135,7 @@ def create(args):
# disk. Attach the disk image using gem5's Copy-on-Write
# functionality to avoid writing changes to the stored copy of
# the disk image.
PciVirtIO(vio=VirtIOBlock(image=create_cow_image(args.disk_image))),
PciVirtIO(vio=VirtIOBlock(image=create_cow_image(args.disk_image)))
]
# Attach the PCI devices to the system. The helper method in the
@@ -141,8 +156,9 @@ def create(args):
system.workload.dtb_filename = args.dtb
else:
# No DTB specified: autogenerate DTB
system.workload.dtb_filename = \
os.path.join(m5.options.outdir, 'system.dtb')
system.workload.dtb_filename = os.path.join(
m5.options.outdir, "system.dtb"
)
system.generateDtb(system.workload.dtb_filename)
# Linux boot command flags
@@ -189,41 +205,73 @@ def run(args):
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dtb", type=str, default=None,
help="DTB file to load")
parser.add_argument("--kernel", type=str, default=default_kernel,
help="Linux kernel")
parser.add_argument("--disk-image", type=str,
default=default_disk,
help="Disk to instantiate")
parser.add_argument("--root-device", type=str,
default=default_root_device,
help="OS device name for root partition (default: {})"
.format(default_root_device))
parser.add_argument("--script", type=str, default="",
help = "Linux bootscript")
parser.add_argument("--cpu", choices=list(cpu_types.keys()),
default="minor",
help="CPU model to use")
parser.add_argument(
"--dtb", type=str, default=None, help="DTB file to load"
)
parser.add_argument(
"--kernel", type=str, default=default_kernel, help="Linux kernel"
)
parser.add_argument(
"--disk-image",
type=str,
default=default_disk,
help="Disk to instantiate",
)
parser.add_argument(
"--root-device",
type=str,
default=default_root_device,
help="OS device name for root partition (default: {})".format(
default_root_device
),
)
parser.add_argument(
"--script", type=str, default="", help="Linux bootscript"
)
parser.add_argument(
"--cpu",
choices=list(cpu_types.keys()),
default="minor",
help="CPU model to use",
)
parser.add_argument("--cpu-freq", type=str, default="4GHz")
parser.add_argument("-n", "--num-cpus", type=int, default=1)
parser.add_argument("--checkpoint", action="store_true")
parser.add_argument("--restore", type=str, default=None)
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument("--mem-channels", type=int, default=1,
help = "number of memory channels")
parser.add_argument("--mem-ranks", type=int, default=None,
help = "number of memory ranks per channel")
parser.add_argument(
"--mem-size", action="store", type=str, default="2GiB",
help="Specify the physical memory size (single memory)")
parser.add_argument("--enable-dram-powerdown", action="store_true",
help="Enable low-power states in DRAMInterface")
parser.add_argument("--mem-channels-intlv", type=int, default=0,
help="Memory channels interleave")
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument(
"--mem-channels", type=int, default=1, help="number of memory channels"
)
parser.add_argument(
"--mem-ranks",
type=int,
default=None,
help="number of memory ranks per channel",
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="2GiB",
help="Specify the physical memory size (single memory)",
)
parser.add_argument(
"--enable-dram-powerdown",
action="store_true",
help="Enable low-power states in DRAMInterface",
)
parser.add_argument(
"--mem-channels-intlv",
type=int,
default=0,
help="Memory channels interleave",
)
parser.add_argument("--num-dirs", type=int, default=1)
parser.add_argument("--num-l2caches", type=int, default=1)

View File

@@ -45,7 +45,7 @@ from m5.objects import *
from m5.options import *
import argparse
m5.util.addToPath('../..')
m5.util.addToPath("../..")
from common import SysPaths
from common import ObjectList
@@ -55,37 +55,37 @@ from common.cores.arm import O3_ARM_v7a, HPI
import devices
default_kernel = 'vmlinux.arm64'
default_disk = 'linaro-minimal-aarch64.img'
default_root_device = '/dev/vda1'
default_kernel = "vmlinux.arm64"
default_disk = "linaro-minimal-aarch64.img"
default_root_device = "/dev/vda1"
# Pre-defined CPU configurations. Each tuple must be ordered as : (cpu_class,
# l1_icache_class, l1_dcache_class, l2_Cache_class). Any of
# the cache class may be 'None' if the particular cache is not present.
cpu_types = {
"atomic" : (AtomicSimpleCPU, None, None, None),
"minor" : (MinorCPU,
devices.L1I, devices.L1D,
devices.L2),
"hpi" : (HPI.HPI,
HPI.HPI_ICache, HPI.HPI_DCache,
HPI.HPI_L2),
"o3" : (O3_ARM_v7a.O3_ARM_v7a_3,
O3_ARM_v7a.O3_ARM_v7a_ICache, O3_ARM_v7a.O3_ARM_v7a_DCache,
O3_ARM_v7a.O3_ARM_v7aL2),
"atomic": (AtomicSimpleCPU, None, None, None),
"minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
"hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
"o3": (
O3_ARM_v7a.O3_ARM_v7a_3,
O3_ARM_v7a.O3_ARM_v7a_ICache,
O3_ARM_v7a.O3_ARM_v7a_DCache,
O3_ARM_v7a.O3_ARM_v7aL2,
),
}
def create_cow_image(name):
"""Helper function to create a Copy-on-Write disk image"""
image = CowDiskImage()
image.child.image_file = SysPaths.disk(name)
return image;
return image
def create(args):
''' Create and configure the system object. '''
"""Create and configure the system object."""
if args.script and not os.path.isfile(args.script):
print("Error: Bootscript %s does not exist" % args.script)
@@ -96,13 +96,13 @@ def create(args):
# Only simulate caches when using a timing CPU (e.g., the HPI model)
want_caches = True if mem_mode == "timing" else False
system = devices.SimpleSystem(want_caches,
args.mem_size,
mem_mode=mem_mode,
workload=ArmFsLinux(
object_file=
SysPaths.binary(args.kernel)),
readfile=args.script)
system = devices.SimpleSystem(
want_caches,
args.mem_size,
mem_mode=mem_mode,
workload=ArmFsLinux(object_file=SysPaths.binary(args.kernel)),
readfile=args.script,
)
MemConfig.config_mem(args, system)
@@ -114,7 +114,7 @@ def create(args):
# disk. Attach the disk image using gem5's Copy-on-Write
# functionality to avoid writing changes to the stored copy of
# the disk image.
PciVirtIO(vio=VirtIOBlock(image=create_cow_image(args.disk_image))),
PciVirtIO(vio=VirtIOBlock(image=create_cow_image(args.disk_image)))
]
# Attach the PCI devices to the system. The helper method in the
@@ -128,10 +128,9 @@ def create(args):
# Add CPU clusters to the system
system.cpu_cluster = [
devices.CpuCluster(system,
args.num_cores,
args.cpu_freq, "1.0V",
*cpu_types[args.cpu]),
devices.CpuCluster(
system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
)
]
# Create a cache hierarchy for the cluster. We are assuming that
@@ -146,8 +145,9 @@ def create(args):
system.workload.dtb_filename = args.dtb
else:
# No DTB specified: autogenerate DTB
system.workload.dtb_filename = \
os.path.join(m5.options.outdir, 'system.dtb')
system.workload.dtb_filename = os.path.join(
m5.options.outdir, "system.dtb"
)
system.generateDtb(system.workload.dtb_filename)
if args.initrd:
@@ -197,41 +197,71 @@ def run(args):
def main():
parser = argparse.ArgumentParser(epilog=__doc__)
parser.add_argument("--dtb", type=str, default=None,
help="DTB file to load")
parser.add_argument("--kernel", type=str, default=default_kernel,
help="Linux kernel")
parser.add_argument("--initrd", type=str, default=None,
help="initrd/initramfs file to load")
parser.add_argument("--disk-image", type=str,
default=default_disk,
help="Disk to instantiate")
parser.add_argument("--root-device", type=str,
default=default_root_device,
help="OS device name for root partition (default: {})"
.format(default_root_device))
parser.add_argument("--script", type=str, default="",
help = "Linux bootscript")
parser.add_argument("--cpu", type=str, choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use")
parser.add_argument(
"--dtb", type=str, default=None, help="DTB file to load"
)
parser.add_argument(
"--kernel", type=str, default=default_kernel, help="Linux kernel"
)
parser.add_argument(
"--initrd",
type=str,
default=None,
help="initrd/initramfs file to load",
)
parser.add_argument(
"--disk-image",
type=str,
default=default_disk,
help="Disk to instantiate",
)
parser.add_argument(
"--root-device",
type=str,
default=default_root_device,
help="OS device name for root partition (default: {})".format(
default_root_device
),
)
parser.add_argument(
"--script", type=str, default="", help="Linux bootscript"
)
parser.add_argument(
"--cpu",
type=str,
choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use",
)
parser.add_argument("--cpu-freq", type=str, default="4GHz")
parser.add_argument("--num-cores", type=int, default=1,
help="Number of CPU cores")
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument("--mem-channels", type=int, default=1,
help = "number of memory channels")
parser.add_argument("--mem-ranks", type=int, default=None,
help = "number of memory ranks per channel")
parser.add_argument("--mem-size", action="store", type=str,
default="2GB",
help="Specify the physical memory size")
parser.add_argument(
"--num-cores", type=int, default=1, help="Number of CPU cores"
)
parser.add_argument(
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument(
"--mem-channels", type=int, default=1, help="number of memory channels"
)
parser.add_argument(
"--mem-ranks",
type=int,
default=None,
help="number of memory ranks per channel",
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="2GB",
help="Specify the physical memory size",
)
parser.add_argument("--checkpoint", action="store_true")
parser.add_argument("--restore", type=str, default=None)
args = parser.parse_args()
root = Root(full_system=True)

View File

@@ -45,7 +45,7 @@ from m5.objects import *
import argparse
import shlex
m5.util.addToPath('../..')
m5.util.addToPath("../..")
from common import ObjectList
from common import MemConfig
@@ -54,25 +54,20 @@ from common.cores.arm import HPI
import devices
# Pre-defined CPU configurations. Each tuple must be ordered as : (cpu_class,
# l1_icache_class, l1_dcache_class, walk_cache_class, l2_Cache_class). Any of
# the cache class may be 'None' if the particular cache is not present.
cpu_types = {
"atomic" : ( AtomicSimpleCPU, None, None, None),
"minor" : (MinorCPU,
devices.L1I, devices.L1D,
devices.L2),
"hpi" : ( HPI.HPI,
HPI.HPI_ICache, HPI.HPI_DCache,
HPI.HPI_L2)
"atomic": (AtomicSimpleCPU, None, None, None),
"minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
"hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
}
class SimpleSeSystem(System):
'''
"""
Example system class for syscall emulation mode
'''
"""
# Use a fixed cache line size of 64 bytes
cache_line_size = 64
@@ -87,8 +82,9 @@ class SimpleSeSystem(System):
# Create a voltage and clock domain for system components
self.voltage_domain = VoltageDomain(voltage="3.3V")
self.clk_domain = SrcClockDomain(clock="1GHz",
voltage_domain=self.voltage_domain)
self.clk_domain = SrcClockDomain(
clock="1GHz", voltage_domain=self.voltage_domain
)
# Create the off-chip memory bus.
self.membus = SystemXBar()
@@ -97,13 +93,11 @@ class SimpleSeSystem(System):
# and to perform debug accesses.
self.system_port = self.membus.cpu_side_ports
# Add CPUs to the system. A cluster of CPUs typically have
# private L1 caches and a shared L2 cache.
self.cpu_cluster = devices.CpuCluster(self,
args.num_cores,
args.cpu_freq, "1.2V",
*cpu_types[args.cpu])
self.cpu_cluster = devices.CpuCluster(
self, args.num_cores, args.cpu_freq, "1.2V", *cpu_types[args.cpu]
)
# Create a cache hierarchy (unless we are simulating a
# functional CPU in atomic memory mode) for the CPU cluster
@@ -129,6 +123,7 @@ class SimpleSeSystem(System):
def numCpus(self):
return self._num_cpus
def get_processes(cmd):
"""Interprets commands to run and returns a list of processes"""
@@ -147,14 +142,14 @@ def get_processes(cmd):
def create(args):
''' Create and configure the system object. '''
"""Create and configure the system object."""
system = SimpleSeSystem(args)
# Tell components about the expected physical memory ranges. This
# is, for example, used by the MemConfig helper to determine where
# to map DRAMs in the physical address space.
system.mem_ranges = [ AddrRange(start=0, size=args.mem_size) ]
system.mem_ranges = [AddrRange(start=0, size=args.mem_size)]
# Configure the off-chip memory system.
MemConfig.config_mem(args, system)
@@ -163,8 +158,10 @@ def create(args):
# that we can pass to gem5.
processes = get_processes(args.commands_to_run)
if len(processes) != args.num_cores:
print("Error: Cannot map %d command(s) onto %d CPU(s)" %
(len(processes), args.num_cores))
print(
"Error: Cannot map %d command(s) onto %d CPU(s)"
% (len(processes), args.num_cores)
)
sys.exit(1)
system.workload = SEWorkload.init_compatible(processes[0].executable)
@@ -179,24 +176,45 @@ def create(args):
def main():
parser = argparse.ArgumentParser(epilog=__doc__)
parser.add_argument("commands_to_run", metavar="command(s)", nargs='*',
help="Command(s) to run")
parser.add_argument("--cpu", type=str, choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use")
parser.add_argument(
"commands_to_run",
metavar="command(s)",
nargs="*",
help="Command(s) to run",
)
parser.add_argument(
"--cpu",
type=str,
choices=list(cpu_types.keys()),
default="atomic",
help="CPU model to use",
)
parser.add_argument("--cpu-freq", type=str, default="4GHz")
parser.add_argument("--num-cores", type=int, default=1,
help="Number of CPU cores")
parser.add_argument("--mem-type", default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help = "type of memory to use")
parser.add_argument("--mem-channels", type=int, default=2,
help = "number of memory channels")
parser.add_argument("--mem-ranks", type=int, default=None,
help = "number of memory ranks per channel")
parser.add_argument("--mem-size", action="store", type=str,
default="2GB",
help="Specify the physical memory size")
parser.add_argument(
"--num-cores", type=int, default=1, help="Number of CPU cores"
)
parser.add_argument(
"--mem-type",
default="DDR3_1600_8x8",
choices=ObjectList.mem_list.get_names(),
help="type of memory to use",
)
parser.add_argument(
"--mem-channels", type=int, default=2, help="number of memory channels"
)
parser.add_argument(
"--mem-ranks",
type=int,
default=None,
help="number of memory ranks per channel",
)
parser.add_argument(
"--mem-size",
action="store",
type=str,
default="2GB",
help="Specify the physical memory size",
)
args = parser.parse_args()

View File

@@ -42,8 +42,10 @@ from m5.options import *
from common.ObjectList import ObjectList
from common.SysPaths import binary, disk
class ArmBaremetal(ArmFsWorkload):
""" Baremetal workload """
"""Baremetal workload"""
dtb_addr = 0
def __init__(self, obj, system, **kwargs):
@@ -51,6 +53,7 @@ class ArmBaremetal(ArmFsWorkload):
self.object_file = obj
class ArmTrustedFirmware(ArmFsWorkload):
"""
Arm Trusted Firmware (TFA) workload.
@@ -69,20 +72,22 @@ class ArmTrustedFirmware(ArmFsWorkload):
https://github.com/ARM-software/arm-trusted-firmware
"""
dtb_addr = 0
def __init__(self, obj, system, **kwargs):
super(ArmTrustedFirmware, self).__init__(**kwargs)
self.extras = [ binary('bl1.bin'), binary('fip.bin'), ]
self.extras = [binary("bl1.bin"), binary("fip.bin")]
self.extras_addrs = [
system.realview.bootmem.range.start,
system.realview.flash0.range.start
system.realview.flash0.range.start,
]
# Arm Trusted Firmware will provide a PSCI implementation
system._have_psci = True
class _WorkloadList(ObjectList):
def _add_objects(self):
"""Add all sub-classes of the base class in the object hierarchy."""
@@ -90,4 +95,5 @@ class _WorkloadList(ObjectList):
for name, cls in inspect.getmembers(modname, self._is_obj_class):
self._sub_classes[name] = cls
workload_list = _WorkloadList(getattr(m5.objects, 'ArmFsWorkload', None))
workload_list = _WorkloadList(getattr(m5.objects, "ArmFsWorkload", None))

View File

@@ -39,7 +39,7 @@ import argparse
from m5.util import addToPath, fatal
addToPath('../')
addToPath("../")
from common import Options
from common import Simulation
@@ -50,9 +50,11 @@ from common.Caches import *
parser = argparse.ArgumentParser()
Options.addCommonOptions(parser)
if '--ruby' in sys.argv:
print("This script does not support Ruby configuration, mainly"
" because Trace CPU has been tested only with classic memory system")
if "--ruby" in sys.argv:
print(
"This script does not support Ruby configuration, mainly"
" because Trace CPU has been tested only with classic memory system"
)
sys.exit(1)
args = parser.parse_args()
@@ -60,8 +62,10 @@ args = parser.parse_args()
numThreads = 1
if args.cpu_type != "TraceCPU":
fatal("This is a script for elastic trace replay simulation, use "\
"--cpu-type=TraceCPU\n");
fatal(
"This is a script for elastic trace replay simulation, use "
"--cpu-type=TraceCPU\n"
)
if args.num_cpus > 1:
fatal("This script does not support multi-processor trace replay.\n")
@@ -71,27 +75,30 @@ if args.num_cpus > 1:
(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
CPUClass.numThreads = numThreads
system = System(cpu = CPUClass(cpu_id=0),
mem_mode = test_mem_mode,
mem_ranges = [AddrRange(args.mem_size)],
cache_line_size = args.cacheline_size)
system = System(
cpu=CPUClass(cpu_id=0),
mem_mode=test_mem_mode,
mem_ranges=[AddrRange(args.mem_size)],
cache_line_size=args.cacheline_size,
)
# Create a top-level voltage domain
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
# Create a source clock for the system. This is used as the clock period for
# xbar and memory
system.clk_domain = SrcClockDomain(clock = args.sys_clock,
voltage_domain = system.voltage_domain)
system.clk_domain = SrcClockDomain(
clock=args.sys_clock, voltage_domain=system.voltage_domain
)
# Create a CPU voltage domain
system.cpu_voltage_domain = VoltageDomain()
# Create a separate clock domain for the CPUs. In case of Trace CPUs this clock
# is actually used only by the caches connected to the CPU.
system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
voltage_domain =
system.cpu_voltage_domain)
system.cpu_clk_domain = SrcClockDomain(
clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain
)
# All cpus belong to a common cpu_clk_domain, therefore running at a common
# frequency.
@@ -104,8 +111,8 @@ for cpu in system.cpu:
cpu.createThreads()
# Assign input trace files to the Trace CPU
system.cpu.instTraceFile=args.inst_trace_file
system.cpu.dataTraceFile=args.data_trace_file
system.cpu.instTraceFile = args.inst_trace_file
system.cpu.dataTraceFile = args.data_trace_file
# Configure the classic memory system args
MemClass = Simulation.setMemClass(args)
@@ -114,5 +121,5 @@ system.system_port = system.membus.cpu_side_ports
CacheConfig.config_cache(args, system)
MemConfig.config_mem(args, system)
root = Root(full_system = False, system = system)
root = Root(full_system=False, system=system)
Simulation.run(args, root, system, FutureClass)

View File

@@ -47,8 +47,10 @@ from m5.defines import buildEnv
from m5.objects import *
from m5.util import addToPath, fatal, warn
from m5.util.fdthelper import *
from gem5.isas import ISA
from gem5.runtime import get_runtime_isa
addToPath('../')
addToPath("../")
from ruby import Ruby
@@ -63,10 +65,13 @@ from common import ObjectList
from common.Caches import *
from common import Options
def cmd_line_template():
if args.command_line and args.command_line_file:
print("Error: --command-line and --command-line-file are "
"mutually exclusive")
print(
"Error: --command-line and --command-line-file are "
"mutually exclusive"
)
sys.exit(1)
if args.command_line:
return args.command_line
@@ -74,19 +79,23 @@ def cmd_line_template():
return open(args.command_line_file).read().strip()
return None
def build_test_system(np):
cmdline = cmd_line_template()
if buildEnv['TARGET_ISA'] == "mips":
isa = get_runtime_isa()
if isa == ISA.MIPS:
test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == "sparc":
elif isa == ISA.SPARC:
test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == "riscv":
test_sys = makeBareMetalRiscvSystem(test_mem_mode, bm[0],
cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == "x86":
test_sys = makeLinuxX86System(test_mem_mode, np, bm[0], args.ruby,
cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == "arm":
elif isa == ISA.RISCV:
test_sys = makeBareMetalRiscvSystem(
test_mem_mode, bm[0], cmdline=cmdline
)
elif isa == ISA.X86:
test_sys = makeLinuxX86System(
test_mem_mode, np, bm[0], args.ruby, cmdline=cmdline
)
elif isa == ISA.ARM:
test_sys = makeArmSystem(
test_mem_mode,
args.machine_type,
@@ -103,27 +112,28 @@ def build_test_system(np):
if args.enable_context_switch_stats_dump:
test_sys.enable_context_switch_stats_dump = True
else:
fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])
fatal("Incapable of building %s full system!", isa.name)
# Set the cache line size for the entire system
test_sys.cache_line_size = args.cacheline_size
# Create a top-level voltage domain
test_sys.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
test_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
# Create a source clock for the system and set the clock period
test_sys.clk_domain = SrcClockDomain(clock = args.sys_clock,
voltage_domain = test_sys.voltage_domain)
test_sys.clk_domain = SrcClockDomain(
clock=args.sys_clock, voltage_domain=test_sys.voltage_domain
)
# Create a CPU voltage domain
test_sys.cpu_voltage_domain = VoltageDomain()
# Create a source clock for the CPUs and set the clock period
test_sys.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
voltage_domain =
test_sys.cpu_voltage_domain)
test_sys.cpu_clk_domain = SrcClockDomain(
clock=args.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain
)
if buildEnv['TARGET_ISA'] == 'riscv':
if buildEnv["USE_RISCV_ISA"]:
test_sys.workload.bootloader = args.kernel
elif args.kernel is not None:
test_sys.workload.object_file = binary(args.kernel)
@@ -134,17 +144,21 @@ def build_test_system(np):
test_sys.init_param = args.init_param
# For now, assign all the CPUs to the same clock domain
test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
for i in range(np)]
test_sys.cpu = [
TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
for i in range(np)
]
if args.ruby:
bootmem = getattr(test_sys, '_bootmem', None)
Ruby.create_system(args, True, test_sys, test_sys.iobus,
test_sys._dma_ports, bootmem)
bootmem = getattr(test_sys, "_bootmem", None)
Ruby.create_system(
args, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem
)
# Create a seperate clock domain for Ruby
test_sys.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
voltage_domain = test_sys.voltage_domain)
test_sys.ruby.clk_domain = SrcClockDomain(
clock=args.ruby_clock, voltage_domain=test_sys.voltage_domain
)
# Connect the ruby io port to the PIO bus,
# assuming that there is just one such port.
@@ -163,11 +177,13 @@ def build_test_system(np):
else:
if args.caches or args.l2cache:
# By default the IOCache runs at the system clock
test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges)
test_sys.iocache.cpu_side = test_sys.iobus.mem_side_ports
test_sys.iocache.mem_side = test_sys.membus.cpu_side_ports
elif not args.external_memory_system:
test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
test_sys.iobridge = Bridge(
delay="50ns", ranges=test_sys.mem_ranges
)
test_sys.iobridge.cpu_side_port = test_sys.iobus.mem_side_ports
test_sys.iobridge.mem_side_port = test_sys.membus.cpu_side_ports
@@ -176,7 +192,9 @@ def build_test_system(np):
if not ObjectList.is_noncaching_cpu(TestCPUClass):
fatal("SimPoint generation should be done with atomic cpu")
if np > 1:
fatal("SimPoint generation not supported with more than one CPUs")
fatal(
"SimPoint generation not supported with more than one CPUs"
)
for i in range(np):
if args.simpoint_profile:
@@ -189,9 +207,11 @@ def build_test_system(np):
test_sys.cpu[i].branchPred = bpClass()
if args.indirect_bp_type:
IndirectBPClass = ObjectList.indirect_bp_list.get(
args.indirect_bp_type)
test_sys.cpu[i].branchPred.indirectBranchPred = \
IndirectBPClass()
args.indirect_bp_type
)
test_sys.cpu[
i
].branchPred.indirectBranchPred = IndirectBPClass()
test_sys.cpu[i].createThreads()
# If elastic tracing is enabled when not restoring from checkpoint and
@@ -201,20 +221,24 @@ def build_test_system(np):
# If restoring from checkpoint or fast forwarding, the code that does this for
# FutureCPUClass is in the Simulation module. If the check passes then the
# elastic trace probe is attached to the switch CPUs.
if args.elastic_trace_en and args.checkpoint_restore == None and \
not args.fast_forward:
if (
args.elastic_trace_en
and args.checkpoint_restore == None
and not args.fast_forward
):
CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args)
CacheConfig.config_cache(args, test_sys)
MemConfig.config_mem(args, test_sys)
if ObjectList.is_kvm_cpu(TestCPUClass) or \
ObjectList.is_kvm_cpu(FutureClass):
if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(
FutureClass
):
# Assign KVM CPUs to their own event queues / threads. This
# has to be done after creating caches and other child objects
# since these mustn't inherit the CPU event queue.
for i,cpu in enumerate(test_sys.cpu):
for i, cpu in enumerate(test_sys.cpu):
# Child objects usually inherit the parent's event
# queue. Override that and use the same event queue for
# all devices.
@@ -225,42 +249,52 @@ def build_test_system(np):
return test_sys
def build_drive_system(np):
# driver system CPU is always simple, so is the memory
# Note this is an assignment of a class, not an instance.
DriveCPUClass = AtomicSimpleCPU
drive_mem_mode = 'atomic'
drive_mem_mode = "atomic"
DriveMemClass = SimpleMemory
cmdline = cmd_line_template()
if buildEnv['TARGET_ISA'] == 'mips':
if buildEnv["USE_MIPS_ISA"]:
drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1], cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == 'sparc':
elif buildEnv["USE_SPARC_ISA"]:
drive_sys = makeSparcSystem(drive_mem_mode, bm[1], cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == 'x86':
drive_sys = makeLinuxX86System(drive_mem_mode, np, bm[1],
cmdline=cmdline)
elif buildEnv['TARGET_ISA'] == 'arm':
drive_sys = makeArmSystem(drive_mem_mode, args.machine_type, np,
bm[1], args.dtb_filename, cmdline=cmdline)
elif buildEnv["USE_X86_ISA"]:
drive_sys = makeLinuxX86System(
drive_mem_mode, np, bm[1], cmdline=cmdline
)
elif buildEnv["USE_ARM_ISA"]:
drive_sys = makeArmSystem(
drive_mem_mode,
args.machine_type,
np,
bm[1],
args.dtb_filename,
cmdline=cmdline,
)
# Create a top-level voltage domain
drive_sys.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
drive_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
# Create a source clock for the system and set the clock period
drive_sys.clk_domain = SrcClockDomain(clock = args.sys_clock,
voltage_domain = drive_sys.voltage_domain)
drive_sys.clk_domain = SrcClockDomain(
clock=args.sys_clock, voltage_domain=drive_sys.voltage_domain
)
# Create a CPU voltage domain
drive_sys.cpu_voltage_domain = VoltageDomain()
# Create a source clock for the CPUs and set the clock period
drive_sys.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
voltage_domain =
drive_sys.cpu_voltage_domain)
drive_sys.cpu_clk_domain = SrcClockDomain(
clock=args.cpu_clock, voltage_domain=drive_sys.cpu_voltage_domain
)
drive_sys.cpu = DriveCPUClass(clk_domain=drive_sys.cpu_clk_domain,
cpu_id=0)
drive_sys.cpu = DriveCPUClass(
clk_domain=drive_sys.cpu_clk_domain, cpu_id=0
)
drive_sys.cpu.createThreads()
drive_sys.cpu.createInterruptController()
drive_sys.cpu.connectBus(drive_sys.membus)
@@ -270,15 +304,15 @@ def build_drive_system(np):
if ObjectList.is_kvm_cpu(DriveCPUClass):
drive_sys.kvm_vm = KvmVM()
drive_sys.iobridge = Bridge(delay='50ns',
ranges = drive_sys.mem_ranges)
drive_sys.iobridge = Bridge(delay="50ns", ranges=drive_sys.mem_ranges)
drive_sys.iobridge.cpu_side_port = drive_sys.iobus.mem_side_ports
drive_sys.iobridge.mem_side_port = drive_sys.membus.cpu_side_ports
# Create the appropriate memory controllers and connect them to the
# memory bus
drive_sys.mem_ctrls = [DriveMemClass(range = r)
for r in drive_sys.mem_ranges]
drive_sys.mem_ctrls = [
DriveMemClass(range=r) for r in drive_sys.mem_ranges
]
for i in range(len(drive_sys.mem_ctrls)):
drive_sys.mem_ctrls[i].port = drive_sys.membus.mem_side_ports
@@ -286,13 +320,14 @@ def build_drive_system(np):
return drive_sys
# Add args
parser = argparse.ArgumentParser()
Options.addCommonOptions(parser)
Options.addFSOptions(parser)
# Add the ruby specific and protocol specific args
if '--ruby' in sys.argv:
if "--ruby" in sys.argv:
Ruby.define_options(parser)
args = parser.parse_args()
@@ -312,13 +347,29 @@ if args.benchmark:
sys.exit(1)
else:
if args.dual:
bm = [SysConfig(disks=args.disk_image, rootdev=args.root_device,
mem=args.mem_size, os_type=args.os_type),
SysConfig(disks=args.disk_image, rootdev=args.root_device,
mem=args.mem_size, os_type=args.os_type)]
bm = [
SysConfig(
disks=args.disk_image,
rootdev=args.root_device,
mem=args.mem_size,
os_type=args.os_type,
),
SysConfig(
disks=args.disk_image,
rootdev=args.root_device,
mem=args.mem_size,
os_type=args.os_type,
),
]
else:
bm = [SysConfig(disks=args.disk_image, rootdev=args.root_device,
mem=args.mem_size, os_type=args.os_type)]
bm = [
SysConfig(
disks=args.disk_image,
rootdev=args.root_device,
mem=args.mem_size,
os_type=args.os_type,
)
]
np = args.num_cpus
@@ -329,28 +380,29 @@ if len(bm) == 2:
root = makeDualRoot(True, test_sys, drive_sys, args.etherdump)
elif len(bm) == 1 and args.dist:
# This system is part of a dist-gem5 simulation
root = makeDistRoot(test_sys,
args.dist_rank,
args.dist_size,
args.dist_server_name,
args.dist_server_port,
args.dist_sync_repeat,
args.dist_sync_start,
args.ethernet_linkspeed,
args.ethernet_linkdelay,
args.etherdump);
root = makeDistRoot(
test_sys,
args.dist_rank,
args.dist_size,
args.dist_server_name,
args.dist_server_port,
args.dist_sync_repeat,
args.dist_sync_start,
args.ethernet_linkspeed,
args.ethernet_linkdelay,
args.etherdump,
)
elif len(bm) == 1:
root = Root(full_system=True, system=test_sys)
else:
print("Error I don't know how to create more than 2 systems.")
sys.exit(1)
if ObjectList.is_kvm_cpu(TestCPUClass) or \
ObjectList.is_kvm_cpu(FutureClass):
if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(FutureClass):
# Required for running kvm on multiple host cores.
# Uses gem5's parallel event queue feature
# Note: The simulator is quite picky about this number!
root.sim_quantum = int(1e9) # 1 ms
root.sim_quantum = int(1e9) # 1 ms
if args.timesync:
root.time_sync_enable = True
@@ -358,22 +410,26 @@ if args.timesync:
if args.frame_capture:
VncServer.frame_capture = True
if buildEnv['TARGET_ISA'] == "arm" and not args.bare_metal \
and not args.dtb_filename:
if args.machine_type not in ["VExpress_GEM5",
"VExpress_GEM5_V1",
"VExpress_GEM5_V2",
"VExpress_GEM5_Foundation"]:
warn("Can only correctly generate a dtb for VExpress_GEM5_* " \
"platforms, unless custom hardware models have been equipped "\
"with generation functionality.")
if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename:
if args.machine_type not in [
"VExpress_GEM5",
"VExpress_GEM5_V1",
"VExpress_GEM5_V2",
"VExpress_GEM5_Foundation",
]:
warn(
"Can only correctly generate a dtb for VExpress_GEM5_* "
"platforms, unless custom hardware models have been equipped "
"with generation functionality."
)
# Generate a Device Tree
for sysname in ('system', 'testsys', 'drivesys'):
for sysname in ("system", "testsys", "drivesys"):
if hasattr(root, sysname):
sys = getattr(root, sysname)
sys.workload.dtb_filename = \
os.path.join(m5.options.outdir, '%s.dtb' % sysname)
sys.workload.dtb_filename = os.path.join(
m5.options.outdir, "%s.dtb" % sysname
)
sys.generateDtb(sys.workload.dtb_filename)
if args.wait_gdb:

View File

@@ -32,7 +32,7 @@ from m5.defines import buildEnv
from m5.util import addToPath
import os, argparse, sys
addToPath('../')
addToPath("../")
from common import Options
from ruby import Ruby
@@ -45,41 +45,77 @@ m5_root = os.path.dirname(config_root)
parser = argparse.ArgumentParser()
Options.addNoISAOptions(parser)
parser.add_argument("--synthetic", default="uniform_random",
choices=['uniform_random', 'tornado', 'bit_complement', \
'bit_reverse', 'bit_rotation', 'neighbor', \
'shuffle', 'transpose'])
parser.add_argument(
"--synthetic",
default="uniform_random",
choices=[
"uniform_random",
"tornado",
"bit_complement",
"bit_reverse",
"bit_rotation",
"neighbor",
"shuffle",
"transpose",
],
)
parser.add_argument("-i", "--injectionrate", type=float, default=0.1,
metavar="I",
help="Injection rate in packets per cycle per node. \
parser.add_argument(
"-i",
"--injectionrate",
type=float,
default=0.1,
metavar="I",
help="Injection rate in packets per cycle per node. \
Takes decimal value between 0 to 1 (eg. 0.225). \
Number of digits after 0 depends upon --precision.")
Number of digits after 0 depends upon --precision.",
)
parser.add_argument("--precision", type=int, default=3,
help="Number of digits of precision after decimal point\
for injection rate")
parser.add_argument(
"--precision",
type=int,
default=3,
help="Number of digits of precision after decimal point\
for injection rate",
)
parser.add_argument("--sim-cycles", type=int, default=1000,
help="Number of simulation cycles")
parser.add_argument(
"--sim-cycles", type=int, default=1000, help="Number of simulation cycles"
)
parser.add_argument("--num-packets-max", type=int, default=-1,
help="Stop injecting after --num-packets-max.\
Set to -1 to disable.")
parser.add_argument(
"--num-packets-max",
type=int,
default=-1,
help="Stop injecting after --num-packets-max.\
Set to -1 to disable.",
)
parser.add_argument("--single-sender-id", type=int, default=-1,
help="Only inject from this sender.\
Set to -1 to disable.")
parser.add_argument(
"--single-sender-id",
type=int,
default=-1,
help="Only inject from this sender.\
Set to -1 to disable.",
)
parser.add_argument("--single-dest-id", type=int, default=-1,
help="Only send to this destination.\
Set to -1 to disable.")
parser.add_argument(
"--single-dest-id",
type=int,
default=-1,
help="Only send to this destination.\
Set to -1 to disable.",
)
parser.add_argument("--inj-vnet", type=int, default=-1,
choices=[-1,0,1,2],
help="Only inject in this vnet (0, 1 or 2).\
parser.add_argument(
"--inj-vnet",
type=int,
default=-1,
choices=[-1, 0, 1, 2],
help="Only inject in this vnet (0, 1 or 2).\
0 and 1 are 1-flit, 2 is 5-flit.\
Set to -1 to inject randomly in all vnets.")
Set to -1 to inject randomly in all vnets.",
)
#
# Add the ruby specific and protocol specific options
@@ -88,51 +124,56 @@ Ruby.define_options(parser)
args = parser.parse_args()
cpus = [ GarnetSyntheticTraffic(
num_packets_max=args.num_packets_max,
single_sender=args.single_sender_id,
single_dest=args.single_dest_id,
sim_cycles=args.sim_cycles,
traffic_type=args.synthetic,
inj_rate=args.injectionrate,
inj_vnet=args.inj_vnet,
precision=args.precision,
num_dest=args.num_dirs) \
for i in range(args.num_cpus) ]
cpus = [
GarnetSyntheticTraffic(
num_packets_max=args.num_packets_max,
single_sender=args.single_sender_id,
single_dest=args.single_dest_id,
sim_cycles=args.sim_cycles,
traffic_type=args.synthetic,
inj_rate=args.injectionrate,
inj_vnet=args.inj_vnet,
precision=args.precision,
num_dest=args.num_dirs,
)
for i in range(args.num_cpus)
]
# create the desired simulated system
system = System(cpu = cpus, mem_ranges = [AddrRange(args.mem_size)])
system = System(cpu=cpus, mem_ranges=[AddrRange(args.mem_size)])
# Create a top-level voltage domain and clock domain
system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
system.clk_domain = SrcClockDomain(clock = args.sys_clock,
voltage_domain = system.voltage_domain)
system.clk_domain = SrcClockDomain(
clock=args.sys_clock, voltage_domain=system.voltage_domain
)
Ruby.create_system(args, False, system)
# Create a seperate clock domain for Ruby
system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
voltage_domain = system.voltage_domain)
system.ruby.clk_domain = SrcClockDomain(
clock=args.ruby_clock, voltage_domain=system.voltage_domain
)
i = 0
for ruby_port in system.ruby._cpu_ports:
#
# Tie the cpu test ports to the ruby cpu port
#
cpus[i].test = ruby_port.in_ports
i += 1
#
# Tie the cpu test ports to the ruby cpu port
#
cpus[i].test = ruby_port.in_ports
i += 1
# -----------------------
# run simulation
# -----------------------
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
root = Root(full_system=False, system=system)
root.system.mem_mode = "timing"
# Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ps')
m5.ticks.setGlobalFrequency("1ps")
# instantiate configuration
m5.instantiate()
@@ -140,4 +181,4 @@ m5.instantiate()
# simulate until program terminates
exit_event = m5.simulate(args.abs_max_tick)
print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
print("Exiting @ tick", m5.curTick(), "because", exit_event.getCause())

View File

@@ -93,7 +93,6 @@ simulator.run()
print(
"Exiting @ tick {} because {}.".format(
simulator.get_current_tick(),
simulator.get_last_exit_event_cause(),
simulator.get_current_tick(), simulator.get_last_exit_event_cause()
)
)

View File

@@ -25,18 +25,17 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script shows an example of booting an ARM based full system Ubuntu
disk image using the gem5's standard library. This simulation boots the disk
image using 2 TIMING CPU cores. The simulation ends when the startup is
completed successfully (i.e. when an `m5_exit instruction is reached on
successful boot).
This script further shows an example of booting an ARM based full system Ubuntu
disk image. This simulation boots the disk image using 2 TIMING CPU cores. The
simulation ends when the startup is completed successfully (i.e. when an
`m5_exit instruction is reached on successful boot).
Usage
-----
```
scons build/ARM/gem5.opt -j<NUM_CPUS>
./build/ARM/gem5.opt configs/example/gem5_library/arm-ubuntu-boot-exit.py
./build/ARM/gem5.opt configs/example/gem5_library/arm-ubuntu-run.py
```
"""
@@ -44,59 +43,44 @@ scons build/ARM/gem5.opt -j<NUM_CPUS>
from gem5.isas import ISA
from m5.objects import ArmDefaultRelease
from gem5.utils.requires import requires
from gem5.resources.resource import Resource
from gem5.resources.workload import Workload
from gem5.simulate.simulator import Simulator
from m5.objects import VExpress_GEM5_Foundation
from gem5.coherence_protocol import CoherenceProtocol
from gem5.components.boards.arm_board import ArmBoard
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.cpu_types import CPUTypes
from gem5.components.processors.simple_processor import SimpleProcessor
# This runs a check to ensure the gem5 binary is compiled for ARM.
requires(
isa_required=ISA.ARM,
)
# This runs a check to ensure the gem5 binary is compiled for ARM and the
# protocol is CHI.
# With ARM, we use simple caches.
requires(isa_required=ISA.ARM)
from gem5.components.cachehierarchies.classic\
.private_l1_private_l2_cache_hierarchy import (
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
# Here we setup the parameters of the l1 and l2 caches.
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
l1d_size="16kB",
l1i_size="16kB",
l2_size="256kB",
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
)
# Memory: Dual Channel DDR4 2400 DRAM device.
memory = DualChannelDDR4_2400(size = "2GB")
memory = DualChannelDDR4_2400(size="2GB")
# Here we setup the processor. We use a simple TIMING processor. The config
# script was also tested with ATOMIC processor.
processor = SimpleProcessor(
cpu_type=CPUTypes.TIMING,
num_cores=2,
)
processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, num_cores=2, isa=ISA.ARM)
# The ArmBoard requires a `release` to be specified. This adds all the
# extensions or features to the system. We are setting this to Armv8
# (ArmDefaultRelease) in this example config script. However, the ArmBoard
# currently does not support SECURITY extension.
# (ArmDefaultRelease) in this example config script.
release = ArmDefaultRelease()
# Removing the SECURITY extension.
release.extensions.remove(release.extensions[2])
# The platform sets up the memory ranges of all the on-chip and off-chip
# devices present on the ARM system.
@@ -105,44 +89,22 @@ platform = VExpress_GEM5_Foundation()
# Here we setup the board. The ArmBoard allows for Full-System ARM simulations.
board = ArmBoard(
clk_freq = "3GHz",
processor = processor,
memory = memory,
cache_hierarchy = cache_hierarchy,
release = release,
platform = platform
clk_freq="3GHz",
processor=processor,
memory=memory,
cache_hierarchy=cache_hierarchy,
release=release,
platform=platform,
)
# Here we set the Full System workload.
# Here we set a full system workload. The "arm64-ubuntu-20.04-boot" boots
# Ubuntu 20.04.
# The `set_kernel_disk_workload` function on the ArmBoard accepts an ARM
# kernel, a disk image, and, path to the bootloader.
board.set_kernel_disk_workload(
# The ARM kernel will be automatically downloaded to the `~/.cache/gem5`
# directory if not already present. The arm-ubuntu-boot-exit was tested
# with `vmlinux.arm64`
kernel = Resource("arm64-linux-kernel-5.4.49"),
# The ARM ubuntu image will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
disk_image = Resource("arm64-ubuntu-18.04-img"),
# We need to specify the path for the bootloader file.
bootloader = Resource("arm64-bootloader-foundation"),
# For the arm64-ubuntu-18.04.img, we need to specify the readfile content
readfile_contents = "m5 exit"
)
board.set_workload(Workload("arm64-ubuntu-20.04-boot"))
# We define the system with the aforementioned system defined.
simulator = Simulator(board = board)
simulator = Simulator(board=board)
# Once the system successfully boots, it encounters an
# `m5_exit instruction encountered`. We stop the simulation then. When the

View File

@@ -67,8 +67,9 @@ cache_hierarchy = NoCache()
memory = SingleChannelDDR3_1600(size="32MB")
# We use a simple Timing processor with one core.
processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.RISCV,
num_cores=1)
processor = SimpleProcessor(
cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1
)
# The gem5 library simble board which can be used to run simple SE-mode
# simulations.
@@ -83,30 +84,23 @@ board = SimpleBoard(
# program compiled to the RISCV ISA. The `Resource` class will automatically
# download the binary from the gem5 Resources cloud bucket if it's not already
# present.
board.set_se_binary_workload(
# the workload should be the same as the save-checkpoint script
Resource("riscv-hello")
)
# Getting the pre-taken checkpoint from gem5-resources. This checkpoint
# We get the pre-taken checkpoint from gem5-resources. This checkpoint
# was taken from running this gem5 configuration script,
# configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py
checkpoint_resource = Resource("riscv-hello-example-checkpoint")
board.set_se_binary_workload(
# the workload should be the same as the save-checkpoint script
Resource("riscv-hello"),
checkpoint=Resource("riscv-hello-example-checkpoint-v22-1"),
)
# Now we restore the checkpoint by passing the path to the checkpoint to
# the Simulator object. The checkpoint_path could be a string containing
# the path to the checkpoint folder. However, here, we use gem5 resources
# to automatically download the checkpoint folder, and use .get_local_path()
# to obtain the path to that folder.
checkpoint_path = checkpoint_resource.get_local_path()
print("Restore a checkpoint at", checkpoint_path)
simulator = Simulator(board=board, full_system=False,
checkpoint_path=checkpoint_path)
simulator = Simulator(
board=board,
full_system=False,
)
simulator.run()
print(
"Exiting @ tick {} because {}.".format(
simulator.get_current_tick(),
simulator.get_last_exit_event_cause(),
simulator.get_current_tick(), simulator.get_last_exit_event_cause()
)
)

View File

@@ -43,6 +43,7 @@ scons build/RISCV/gem5.opt
```
"""
import argparse
from gem5.isas import ISA
from gem5.utils.requires import requires
from gem5.resources.resource import Resource
@@ -53,6 +54,18 @@ from gem5.components.cachehierarchies.classic.no_cache import NoCache
from gem5.components.processors.simple_processor import SimpleProcessor
from gem5.simulate.simulator import Simulator
parser = argparse.ArgumentParser()
parser.add_argument(
"--checkpoint-path",
type=str,
required=False,
default="riscv-hello-checkpoint/",
help="The directory to store the checkpoint.",
)
args = parser.parse_args()
# This check ensures the gem5 binary is compiled to the RISCV ISA target.
# If not, an exception will be thrown.
requires(isa_required=ISA.RISCV)
@@ -64,8 +77,9 @@ cache_hierarchy = NoCache()
memory = SingleChannelDDR3_1600(size="32MB")
# We use a simple Timing processor with one core.
processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.RISCV,
num_cores=1)
processor = SimpleProcessor(
cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1
)
# The gem5 library simble board which can be used to run simple SE-mode
# simulations.
@@ -93,16 +107,14 @@ board.set_se_binary_workload(
# Lastly we run the simulation.
max_ticks = 10**6
simulator = Simulator(board=board, full_system=False)
simulator.run(max_ticks = max_ticks)
simulator.run(max_ticks=max_ticks)
print(
"Exiting @ tick {} because {}.".format(
simulator.get_current_tick(),
simulator.get_last_exit_event_cause(),
simulator.get_current_tick(), simulator.get_last_exit_event_cause()
)
)
checkpoint_path = "riscv-hello-checkpoint/"
print("Taking a checkpoint at", checkpoint_path)
simulator.save_checkpoint(checkpoint_path)
print("Taking a checkpoint at", args.checkpoint_path)
simulator.save_checkpoint(args.checkpoint_path)
print("Done taking a checkpoint")

View File

@@ -0,0 +1,125 @@
# Copyright (c) 2022 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This configuration script shows an example of how to take checkpoints for
SimPoints using the gem5 stdlib. Simpoints are set via a Workload and the
gem5 SimPoint module will calculate where to take checkpoints based of the
SimPoints, SimPoints interval length, and the warmup instruction length.
This scipt builds a simple board with the gem5 stdlib with no cache and a
simple memory structure to take checkpoints. Some of the components, such as
cache hierarchy, can be changed when restoring checkpoints.
Usage
-----
```
scons build/X86/gem5.opt
./build/X86/gem5.opt \
configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
./build/X86/gem5.opt \
configs/example/gem5_library/checkpoints/simpoints-se-restore.py
```
"""
import argparse
from gem5.simulate.exit_event import ExitEvent
from gem5.simulate.simulator import Simulator
from gem5.utils.requires import requires
from gem5.components.boards.simple_board import SimpleBoard
from gem5.components.memory.single_channel import SingleChannelDDR3_1600
from gem5.components.processors.simple_processor import SimpleProcessor
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.resources.workload import Workload
from pathlib import Path
from gem5.components.cachehierarchies.classic.no_cache import NoCache
from gem5.simulate.exit_event_generators import (
save_checkpoint_generator,
)
requires(isa_required=ISA.X86)
parser = argparse.ArgumentParser(
description="An example simpoint workload file path"
)
# The lone arguments is a file path to a directory to store the checkpoints.
parser.add_argument(
"--checkpoint-path",
type=str,
required=False,
default="se_checkpoint_folder/",
help="The directory to store the checkpoint.",
)
args = parser.parse_args()
# When taking a checkpoint, the cache state is not saved, so the cache
# hierarchy can be changed completely when restoring from a checkpoint.
# By using NoCache() to take checkpoints, it can slightly improve the
# performance when running in atomic mode, and it will not put any restrictions
# on what people can do with the checkpoints.
cache_hierarchy = NoCache()
# Using simple memory to take checkpoints might slightly imporve the
# performance in atomic mode. The memory structure can be changed when
# restoring from a checkpoint, but the size of the memory must be maintained.
memory = SingleChannelDDR3_1600(size="2GB")
processor = SimpleProcessor(
cpu_type=CPUTypes.ATOMIC,
isa=ISA.X86,
# SimPoints only works with one core
num_cores=1,
)
board = SimpleBoard(
clk_freq="3GHz",
processor=processor,
memory=memory,
cache_hierarchy=cache_hierarchy,
)
board.set_workload(Workload("x86-print-this-15000-with-simpoints"))
dir = Path(args.checkpoint_path)
dir.mkdir(exist_ok=True)
simulator = Simulator(
board=board,
on_exit_event={
# using the SimPoints event generator in the standard library to take
# checkpoints
ExitEvent.SIMPOINT_BEGIN: save_checkpoint_generator(dir)
},
)
simulator.run()

View File

@@ -0,0 +1,136 @@
# Copyright (c) 2022 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This configuration script shows an example of how to restore a checkpoint that
was taken for SimPoints in the
configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py.
The SimPoints, SimPoints interval length, and the warmup instruction length
are passed into the SimPoint module, so the SimPoint object will store and
calculate the warmup instruction length for each SimPoints based on the
available instructions before reaching the start of the SimPoint. With the
Simulator module, exit event will be generated to stop when the warmup session
ends and the SimPoints interval ends.
This script builds a more complex board than the board used for taking
checkpoint.
Usage
-----
```
scons build/X86/gem5.opt
./build/X86/gem5.opt \
configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
./build/X86/gem5.opt \
configs/example/gem5_library/checkpoints/simpoints-se-restore.py
```
"""
from gem5.simulate.exit_event import ExitEvent
from gem5.simulate.simulator import Simulator
from gem5.utils.requires import requires
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
from gem5.components.boards.simple_board import SimpleBoard
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_processor import SimpleProcessor
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.resources.resource import Resource
from gem5.resources.workload import Workload
from pathlib import Path
from m5.stats import reset, dump
requires(isa_required=ISA.X86)
# The cache hierarchy can be different from the cache hierarchy used in taking
# the checkpoints
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
l1d_size="32kB",
l1i_size="32kB",
l2_size="256kB",
)
# The memory structure can be different from the memory structure used in
# taking the checkpoints, but the size of the memory must be maintained
memory = DualChannelDDR4_2400(size="2GB")
processor = SimpleProcessor(
cpu_type=CPUTypes.TIMING,
isa=ISA.X86,
num_cores=1,
)
board = SimpleBoard(
clk_freq="3GHz",
processor=processor,
memory=memory,
cache_hierarchy=cache_hierarchy,
)
# Here we obtain the workloadfrom gem5 resources, the checkpoint in this
# workload was generated from
# `configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py`.
board.set_workload(
Workload("x86-print-this-15000-with-simpoints-and-checkpoint")
)
def max_inst():
warmed_up = False
while True:
if warmed_up:
print("end of SimPoint interval")
yield True
else:
print("end of warmup, starting to simulate SimPoint")
warmed_up = True
# Schedule a MAX_INSTS exit event during the simulation
simulator.schedule_max_insts(
board.get_simpoint().get_simpoint_interval()
)
dump()
reset()
yield False
simulator = Simulator(
board=board,
on_exit_event={ExitEvent.MAX_INSTS: max_inst()},
)
# Schedule a MAX_INSTS exit event before the simulation begins the
# schedule_max_insts function only schedule event when the instruction length
# is greater than 0.
# In here, it schedules an exit event for the first SimPoint's warmup
# instructions
simulator.schedule_max_insts(board.get_simpoint().get_warmup_list()[0])
simulator.run()

View File

@@ -0,0 +1,114 @@
# Copyright (c) 2021 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script is used for running a traffic generator connected to a memory
device. It supports linear and random accesses with a configurable amount
of write traffic.
By default, this scripts runs with one channel (two pseudo channels) of HBM2
and this channel is driven with 32GiB/s of traffic for 1ms.
"""
import argparse
from m5.objects import MemorySize
from gem5.components.boards.test_board import TestBoard
from gem5.components.processors.linear_generator import LinearGenerator
from gem5.components.processors.random_generator import RandomGenerator
from gem5.components.memory.hbm import HighBandwidthMemory
from gem5.components.memory.dram_interfaces.hbm import HBM_2000_4H_1x64
from gem5.simulate.simulator import Simulator
def generator_factory(
generator_class: str, rd_perc: int, mem_size: MemorySize
):
rd_perc = int(rd_perc)
if rd_perc > 100 or rd_perc < 0:
raise ValueError(
"Read percentage has to be an integer number between 0 and 100."
)
if generator_class == "LinearGenerator":
return LinearGenerator(
duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc
)
elif generator_class == "RandomGenerator":
return RandomGenerator(
duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc
)
else:
raise ValueError(f"Unknown generator class {generator_class}")
parser = argparse.ArgumentParser(
description="A traffic generator that can be used to test a gem5 "
"memory component."
)
parser.add_argument(
"generator_class",
type=str,
help="The class of generator to use.",
choices=[
"LinearGenerator",
"RandomGenerator",
],
)
parser.add_argument(
"read_percentage",
type=int,
help="Percentage of read requests in the generated traffic.",
)
args = parser.parse_args()
# Single pair of HBM2 pseudo channels. This can be replaced with any
# single ported memory device
memory = HighBandwidthMemory(HBM_2000_4H_1x64, 1, 128)
generator = generator_factory(
args.generator_class, args.read_percentage, memory.get_size()
)
# We use the Test Board. This is a special board to run traffic generation
# tasks. Can replace the cache_hierarchy with any hierarchy to simulate the
# cache as well as the memory
board = TestBoard(
clk_freq="1GHz", # Ignored for these generators
generator=generator, # We pass the traffic generator as the processor.
memory=memory,
# With no cache hierarchy the test board will directly connect the
# generator to the memory
cache_hierarchy=None,
)
simulator = Simulator(board=board)
simulator.run()

View File

@@ -42,10 +42,9 @@ Characteristics
from gem5.components.boards.riscv_board import RiscvBoard
from gem5.components.memory import SingleChannelDDR3_1600
from gem5.components.processors.simple_processor import SimpleProcessor
from gem5.components.cachehierarchies.classic.\
private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.utils.requires import requires
@@ -80,8 +79,8 @@ board = RiscvBoard(
# Set the Full System workload.
board.set_kernel_disk_workload(
kernel=Resource("riscv-bootloader-vmlinux-5.10"),
disk_image=Resource("riscv-disk-img"),
kernel=Resource("riscv-bootloader-vmlinux-5.10"),
disk_image=Resource("riscv-disk-img"),
)
simulator = Simulator(board=board)
@@ -90,4 +89,4 @@ print("Beginning simulation!")
# using m5term (`./util/term`): `./m5term localhost <port>`. Note the `<port>`
# value is obtained from the gem5 terminal stdout. Look out for
# "system.platform.terminal: Listening for connections on port <port>".
simulator.run()
simulator.run()

View File

@@ -46,42 +46,33 @@ from m5.objects import Root
from gem5.utils.requires import requires
from gem5.components.boards.riscv_board import RiscvBoard
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_processor import (
SimpleProcessor,
)
from gem5.components.processors.simple_processor import SimpleProcessor
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource
from gem5.simulate.simulator import Simulator
from gem5.resources.workload import Workload
# This runs a check to ensure the gem5 binary is compiled for RISCV.
requires(
isa_required=ISA.RISCV,
)
requires(isa_required=ISA.RISCV)
# With RISCV, we use simple caches.
from gem5.components.cachehierarchies.classic\
.private_l1_private_l2_cache_hierarchy import (
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
PrivateL1PrivateL2CacheHierarchy,
)
# Here we setup the parameters of the l1 and l2 caches.
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
l1d_size="16kB",
l1i_size="16kB",
l2_size="256kB",
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
)
# Memory: Dual Channel DDR4 2400 DRAM device.
memory = DualChannelDDR4_2400(size = "3GB")
memory = DualChannelDDR4_2400(size="3GB")
# Here we setup the processor. We use a simple processor.
processor = SimpleProcessor(
cpu_type=CPUTypes.TIMING,
isa=ISA.RISCV,
num_cores=2,
cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=2
)
# Here we setup the board. The RiscvBoard allows for Full-System RISCV
@@ -93,27 +84,11 @@ board = RiscvBoard(
cache_hierarchy=cache_hierarchy,
)
# Here we set the Full System workload.
# The `set_kernel_disk_workload` function for the RiscvBoard accepts a
# RISCV bootloader and a disk image. Once the system successfully boots, it
# encounters an `m5_exit instruction encountered`. We stop the simulation then.
# When the simulation has ended you may inspect `m5out/system.pc.com_1.device`
# to see the stdout.
board.set_kernel_disk_workload(
# The RISCV bootloader will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
# The riscv-ubuntu boot-test was tested with riscv-bootloader-5.10
kernel=Resource(
"riscv-bootloader-vmlinux-5.10",
),
# The RISCV ubuntu image will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
disk_image=Resource(
"riscv-ubuntu-20.04-img",
),
)
# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots
# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit`
# instruction which stops the simulation. When the simulation has ended you may
# inspect `m5out/system.pc.com_1.device` to see the stdout.
board.set_workload(Workload("riscv-ubuntu-20.04-boot"))
simulator = Simulator(board=board)
simulator.run()

View File

@@ -0,0 +1,87 @@
# Copyright (c) 2022 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This gem5 configuration script runs the RISCVMatchedBoard in FS mode with a
an Ubuntu 20.04 image and calls m5 exit after the simulation has booted the OS.
Usage
---
```
scons build/RISCV/gem5.opt
./build/RISCV/gem5.opt configs/example/gem5_library/riscvmatched-fs.py
```
"""
from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard
from gem5.utils.requires import requires
from gem5.isas import ISA
from gem5.simulate.simulator import Simulator
from gem5.resources.workload import Workload
import argparse
requires(isa_required=ISA.RISCV)
parser = argparse.ArgumentParser(
description="A script which uses the RISCVMatchedBoard in FS mode."
)
parser.add_argument(
"-i",
"--to-init",
action="store_true",
help="Exit the simulation after the Linux Kernel boot.",
)
args = parser.parse_args()
# instantiate the riscv matched board with default parameters
board = RISCVMatchedBoard(
clk_freq="1.2GHz",
l2_size="2MB",
is_fs=True,
)
# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots
# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit`
# instruction which stops the simulation. When the simulation has ended you may
# inspect `m5out/system.pc.com_1.device` to see the stdout.
#
# In the case where the `-i` flag is passed, we add the kernel argument
# `init=/root/exit.sh`. This means the simulation will exit after the Linux
# Kernel has booted.
workload = Workload("riscv-ubuntu-20.04-boot")
kernel_args = board.get_default_kernel_args()
if args.to_init:
kernel_args.append("init=/root/exit.sh")
workload.set_parameter("kernel_args", kernel_args)
board.set_workload(workload)
simulator = Simulator(board=board)
simulator.run()

View File

@@ -0,0 +1,65 @@
# Copyright (c) 2022 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This gem5 configuation script runs a "hello world" binary on the
RISCVMatched prebuilt board found in src/python/gem5/prebuilt/riscvmatched/
Usage
-----
```
scons build/RISCV/gem5.opt
./build/RISCV/gem5.opt \
configs/example/gem5_library/riscvmatched-hello.py
```
"""
from gem5.resources.resource import Resource
from gem5.simulate.simulator import Simulator
from python.gem5.prebuilt.riscvmatched.riscvmatched_board import (
RISCVMatchedBoard,
)
from gem5.isas import ISA
from gem5.utils.requires import requires
requires(isa_required=ISA.RISCV)
# instantiate the riscv matched board with default parameters
board = RISCVMatchedBoard()
# set the hello world riscv binary as the board workload
board.set_se_binary_workload(Resource("riscv-hello"))
# run the simulation with the RISCV Matched board
simulator = Simulator(board=board, full_system=False)
simulator.run()
print(
"Exiting @ tick {} because {}.".format(
simulator.get_current_tick(),
simulator.get_last_exit_event_cause(),
)
)

View File

@@ -64,8 +64,8 @@ from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource
from m5.stats.gem5stats import get_simstat
from gem5.simulate.simulator import Simulator
from gem5.simulate.exit_event import ExitEvent
requires(
isa_required=ISA.X86,
@@ -79,8 +79,25 @@ benchmark_choices = ["cc", "bc", "tc", "pr", "bfs"]
synthetic_choices = ["0", "1"]
size_choices = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "USA-road-d.NY.gr"]
size_choices = [
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"USA-road-d.NY.gr",
]
parser = argparse.ArgumentParser(
description="An example configuration script to run the gapbs benchmarks."
@@ -118,8 +135,7 @@ args = parser.parse_args()
# Setting up all the fixed system parameters here
# Caches: MESI Two Level Cache Hierarchy
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import(
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
@@ -173,8 +189,10 @@ board = X86Board(
if args.synthetic == "1":
if args.size == "USA-road-d.NY.gr":
print("fatal: cannot use a real graph with --synthetic 1",
file=sys.stderr)
print(
"fatal: cannot use a real graph with --synthetic 1",
file=sys.stderr,
)
exit(-1)
command = "./{} -g {}\n".format(args.benchmark, args.size)
@@ -185,24 +203,37 @@ board.set_kernel_disk_workload(
# The x86 linux kernel will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
# gapbs benchamarks was tested with kernel version 4.19.83
kernel=Resource(
"x86-linux-kernel-4.19.83",
),
kernel=Resource("x86-linux-kernel-4.19.83"),
# The x86-gapbs image will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
disk_image=Resource(
"x86-gapbs",
),
disk_image=Resource("x86-gapbs"),
readfile_contents=command,
)
root = Root(full_system=True, system=board)
# sim_quantum must be set when KVM cores are used.
def handle_workbegin():
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
global start_tick
start_tick = m5.curTick()
processor.switch()
yield False # E.g., continue the simulation.
root.sim_quantum = int(1e9)
m5.instantiate()
def handle_workend():
print("Dump stats at the end of the ROI!")
m5.stats.dump()
yield True # Stop the simulation. We're done.
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.WORKBEGIN: handle_workbegin(),
ExitEvent.WORKEND: handle_workend(),
},
)
# We maintain the wall clock time.
@@ -217,75 +248,8 @@ print("Using KVM cpu")
# the first ROI annotation in details. The X86Board currently does not support
# `work items started count reached`.
exit_event = m5.simulate()
# The first exit_event ends with a `workbegin` cause. This means that the
# system started successfully and the execution on the program started. The
# ROI begin is encountered.
if exit_event.getCause() == "workbegin":
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
start_tick = m5.curTick()
# We have completed up to this step using KVM cpu. Now we switch to timing
# cpu for detailed simulation.
processor.switch()
else:
print("Unexpected termination of simulation before ROI was reached!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# The next exit_event is to simulate the ROI. It should be exited with a cause
# marked by `workend`. This implies that the first annotation is successfully
# completed.
exit_event = m5.simulate()
# Reached the end of first ROI.
# We dump the stats here.
# We exepect that ROI ends with `workend`. Otherwise the simulation ended
# unexpectedly.
if exit_event.getCause() == "workend":
print("Dump stats at the end of the ROI!")
m5.stats.dump()
end_tick = m5.curTick()
else:
print("Unexpected termination of simulation while ROI was being executed!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# We get simInsts using get_simstat and output it in the final print statement.
gem5stats = get_simstat(root)
# We get the number of committed instructions from the timing cores. We then
# sum and print them at the end.
roi_insts = float(\
gem5stats.to_json()\
["system"]["processor"]["cores2"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]) + float(\
gem5stats.to_json()\
["system"]["processor"]["cores3"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]\
)
simulator.run()
end_tick = m5.curTick()
# Since we simulated the ROI in details, therefore, simulation is over at this
# point.
@@ -299,8 +263,9 @@ print()
print("Performance statistics:")
print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
print("Instructions executed in ROI: %d" % ((roi_insts)))
print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
print(
"Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
)
print(
"Total wallclock time: %.2fs, %.2f min"
% (time.time() - globalStart, (time.time() - globalStart) / 60)

View File

@@ -54,19 +54,21 @@ from m5.objects import Root
from gem5.utils.requires import requires
from gem5.components.boards.x86_board import X86Board
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_switchable_processor import(
from gem5.components.processors.simple_switchable_processor import (
SimpleSwitchableProcessor,
)
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource
from gem5.simulate.simulator import Simulator
from gem5.simulate.simulator import ExitEvent
from m5.stats.gem5stats import get_simstat
from m5.util import warn
requires(
isa_required = ISA.X86,
isa_required=ISA.X86,
coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL,
kvm_required=True,
)
@@ -93,25 +95,25 @@ parser = argparse.ArgumentParser(
parser.add_argument(
"--benchmark",
type = str,
type=str,
required=True,
help = "Input the benchmark program to execute.",
choices = benchmark_choices,
help="Input the benchmark program to execute.",
choices=benchmark_choices,
)
parser.add_argument(
"--size",
type = str,
type=str,
required=True,
help = "Input the class of the program to simulate.",
choices = size_choices,
help="Input the class of the program to simulate.",
choices=size_choices,
)
parser.add_argument(
"--ticks",
type = int,
help = "Optionally put the maximum number of ticks to execute during the "\
"ROI. It accepts an integer value."
type=int,
help="Optionally put the maximum number of ticks to execute during the "
"ROI. It accepts an integer value.",
)
args = parser.parse_args()
@@ -121,28 +123,31 @@ args = parser.parse_args()
# We warn the user here.
if args.benchmark == "mg" and args.size == "C":
warn("mg.C uses 3.3 GB of memory. Currently we are simulating 3 GB\
of main memory in the system.")
warn(
"mg.C uses 3.3 GB of memory. Currently we are simulating 3 GB\
of main memory in the system."
)
# The simulation will fail in the case of `ft` with class C. We warn the user
# here.
elif args.benchmark == "ft" and args.size == "C":
warn("There is not enough memory for ft.C. Currently we are\
simulating 3 GB of main memory in the system.")
warn(
"There is not enough memory for ft.C. Currently we are\
simulating 3 GB of main memory in the system."
)
# Checking for the maximum number of instructions, if provided by the user.
# Setting up all the fixed system parameters here
# Caches: MESI Two Level Cache Hierarchy
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import(
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
cache_hierarchy = MESITwoLevelCacheHierarchy(
l1d_size = "32kB",
l1d_assoc = 8,
l1d_size="32kB",
l1d_assoc=8,
l1i_size="32kB",
l1i_assoc=8,
l2_size="256kB",
@@ -152,7 +157,7 @@ cache_hierarchy = MESITwoLevelCacheHierarchy(
# Memory: Dual Channel DDR4 2400 DRAM device.
# The X86 board only supports 3 GB of main memory.
memory = DualChannelDDR4_2400(size = "3GB")
memory = DualChannelDDR4_2400(size="3GB")
# Here we setup the processor. This is a special switchable processor in which
# a starting core type and a switch core type must be specified. Once a
@@ -189,35 +194,63 @@ board = X86Board(
# Also, we sleep the system for some time so that the output is printed
# properly.
command="/home/gem5/NPB3.3-OMP/bin/{}.{}.x;".format(args.benchmark,args.size)\
+ "sleep 5;" \
command = (
"/home/gem5/NPB3.3-OMP/bin/{}.{}.x;".format(args.benchmark, args.size)
+ "sleep 5;"
+ "m5 exit;"
)
board.set_kernel_disk_workload(
# The x86 linux kernel will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
# npb benchamarks was tested with kernel version 4.19.83
kernel=Resource(
"x86-linux-kernel-4.19.83",
),
kernel=Resource("x86-linux-kernel-4.19.83"),
# The x86-npb image will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
disk_image=Resource(
"x86-npb",
),
disk_image=Resource("x86-npb"),
readfile_contents=command,
)
# We need this for long running processes.
m5.disableAllListeners()
# The first exit_event ends with a `workbegin` cause. This means that the
# system started successfully and the execution on the program started.
def handle_workbegin():
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
root = Root(full_system = True, system = board)
m5.stats.reset()
# sim_quantum must be set when KVM cores are used.
# We have completed up to this step using KVM cpu. Now we switch to timing
# cpu for detailed simulation.
root.sim_quantum = int(1e9)
# # Next, we need to check if the user passed a value for --ticks. If yes,
# then we limit out execution to this number of ticks during the ROI.
# Otherwise, we simulate until the ROI ends.
processor.switch()
if args.ticks:
# schedule an exit event for this amount of ticks in the future.
# The simulation will then continue.
m5.scheduleTickExitFromCurrent(args.ticks)
yield False
m5.instantiate()
# The next exit_event is to simulate the ROI. It should be exited with a cause
# marked by `workend`.
# We exepect that ROI ends with `workend` or `simulate() limit reached`.
def handle_workend():
print("Dump stats at the end of the ROI!")
m5.stats.dump()
yield True
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.WORKBEGIN: handle_workbegin(),
ExitEvent.WORKEND: handle_workend(),
},
)
# We maintain the wall clock time.
@@ -227,96 +260,12 @@ print("Running the simulation")
print("Using KVM cpu")
# We start the simulation.
exit_event = m5.simulate()
# The first exit_event ends with a `workbegin` cause. This means that the
# system started successfully and the execution on the program started.
if exit_event.getCause() == "workbegin":
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
start_tick = m5.curTick()
# We have completed up to this step using KVM cpu. Now we switch to timing
# cpu for detailed simulation.
processor.switch()
else:
# `workbegin` call was never encountered.
print("Unexpected termination of simulation before ROI was reached!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# The next exit_event is to simulate the ROI. It should be exited with a cause
# marked by `workend`.
# Next, we need to check if the user passed a value for --ticks. If yes,
# then we limit out execution to this number of ticks during the ROI.
# Otherwise, we simulate until the ROI ends.
if args.ticks:
exit_event = m5.simulate(args.ticks)
else:
exit_event = m5.simulate()
# Reached the end of ROI.
# We dump the stats here.
# We exepect that ROI ends with `workend` or `simulate() limit reached`.
# Otherwise the simulation ended unexpectedly.
if exit_event.getCause() == "workend":
print("Dump stats at the end of the ROI!")
m5.stats.dump()
end_tick = m5.curTick()
elif exit_event.getCause() == "simulate() limit reached" and \
args.ticks is not None:
print("Dump stats at the end of {} ticks in the ROI".format(args.ticks))
m5.stats.dump()
end_tick = m5.curTick()
else:
print("Unexpected termination of simulation while ROI was being executed!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
simulator.run()
# We need to note that the benchmark is not executed completely till this
# point, but, the ROI has. We collect the essential statistics here before
# resuming the simulation again.
# We get simInsts using get_simstat and output it in the final
# print statement.
gem5stats = get_simstat(root)
# We get the number of committed instructions from the timing
# cores. We then sum and print them at the end.
roi_insts = float(\
gem5stats.to_json()\
["system"]["processor"]["cores2"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]
) + float(\
gem5stats.to_json()\
["system"]["processor"]["cores3"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]\
)
# Simulation is over at this point. We acknowledge that all the simulation
# events were successful.
print("All simulation events were successful.")
@@ -326,8 +275,17 @@ print("Done with the simulation")
print()
print("Performance statistics:")
print("Simulated time in ROI: %.2fs" % ((end_tick-start_tick)/1e12))
print("Instructions executed in ROI: %d" % ((roi_insts)))
print("Ran a total of", m5.curTick()/1e12, "simulated seconds")
print("Total wallclock time: %.2fs, %.2f min" % \
(time.time()-globalStart, (time.time()-globalStart)/60))
# manually calculate ROI time if ticks arg is used in case the
# entire ROI wasn't simulated
if args.ticks:
print(f"Simulated time in ROI (to tick): {args.ticks/ 1e12}s")
else:
print(f"Simulated time in ROI: {simulator.get_roi_ticks()[0] / 1e12}s")
print(
f"Ran a total of {simulator.get_current_tick() / 1e12} simulated seconds"
)
print(
"Total wallclock time: %.2fs, %.2f min"
% (time.time() - globalStart, (time.time() - globalStart) / 60)
)

View File

@@ -53,33 +53,45 @@ from m5.objects import Root
from gem5.utils.requires import requires
from gem5.components.boards.x86_board import X86Board
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_switchable_processor import(
from gem5.components.processors.simple_switchable_processor import (
SimpleSwitchableProcessor,
)
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource
from m5.stats.gem5stats import get_simstat
from gem5.simulate.simulator import Simulator
from gem5.simulate.exit_event import ExitEvent
# We check for the required gem5 build.
requires(
isa_required = ISA.X86,
isa_required=ISA.X86,
coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL,
kvm_required=True,
)
# Following are the list of benchmark programs for parsec.
benchmark_choices = ["blackscholes", "bodytrack", "canneal", "dedup",
"facesim", "ferret", "fluidanimate", "freqmine",
"raytrace", "streamcluster", "swaptions", "vips", "x264"]
benchmark_choices = [
"blackscholes",
"bodytrack",
"canneal",
"dedup",
"facesim",
"ferret",
"fluidanimate",
"freqmine",
"raytrace",
"streamcluster",
"swaptions",
"vips",
"x264",
]
# Following are the input size.
size_choices=["simsmall", "simmedium", "simlarge"]
size_choices = ["simsmall", "simmedium", "simlarge"]
parser = argparse.ArgumentParser(
description="An example configuration script to run the npb benchmarks."
@@ -89,32 +101,31 @@ parser = argparse.ArgumentParser(
parser.add_argument(
"--benchmark",
type = str,
type=str,
required=True,
help = "Input the benchmark program to execute.",
choices = benchmark_choices,
help="Input the benchmark program to execute.",
choices=benchmark_choices,
)
parser.add_argument(
"--size",
type = str,
type=str,
required=True,
help = "Simulation size the benchmark program.",
choices = size_choices,
help="Simulation size the benchmark program.",
choices=size_choices,
)
args = parser.parse_args()
# Setting up all the fixed system parameters here
# Caches: MESI Two Level Cache Hierarchy
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import(
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
cache_hierarchy = MESITwoLevelCacheHierarchy(
l1d_size = "32kB",
l1d_assoc = 8,
l1d_size="32kB",
l1d_assoc=8,
l1i_size="32kB",
l1i_assoc=8,
l2_size="256kB",
@@ -125,7 +136,7 @@ cache_hierarchy = MESITwoLevelCacheHierarchy(
# Memory: Dual Channel DDR4 2400 DRAM device.
# The X86 board only supports 3 GB of main memory.
memory = DualChannelDDR4_2400(size = "3GB")
memory = DualChannelDDR4_2400(size="3GB")
# Here we setup the processor. This is a special switchable processor in which
# a starting core type and a switch core type must be specified. Once a
@@ -163,38 +174,49 @@ board = X86Board(
# properly.
command = "cd /home/gem5/parsec-benchmark;".format(args.benchmark) \
+ "source env.sh;" \
command = (
"cd /home/gem5/parsec-benchmark;".format(args.benchmark)
+ "source env.sh;"
+ "parsecmgmt -a run -p {} -c gcc-hooks -i {} \
-n {};".format(args.benchmark, args.size, "2") \
+ "sleep 5;" \
+ "m5 exit;" \
-n {};".format(
args.benchmark, args.size, "2"
)
+ "sleep 5;"
+ "m5 exit;"
)
board.set_kernel_disk_workload(
# The x86 linux kernel will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
# PARSEC benchamarks were tested with kernel version 4.19.83
kernel=Resource(
"x86-linux-kernel-4.19.83",
),
kernel=Resource("x86-linux-kernel-4.19.83"),
# The x86-parsec image will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
disk_image=Resource(
"x86-parsec",
),
disk_image=Resource("x86-parsec"),
readfile_contents=command,
)
# We need this for long running processes.
m5.disableAllListeners()
# functions to handle different exit events during the simuation
def handle_workbegin():
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
processor.switch()
yield False
root = Root(full_system = True, system = board)
# sim_quantum must be set if KVM cores are used.
def handle_workend():
print("Dump stats at the end of the ROI!")
m5.stats.dump()
yield True
root.sim_quantum = int(1e9)
m5.instantiate()
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.WORKBEGIN: handle_workbegin(),
ExitEvent.WORKEND: handle_workend(),
},
)
# We maintain the wall clock time.
@@ -203,84 +225,11 @@ globalStart = time.time()
print("Running the simulation")
print("Using KVM cpu")
start_tick = m5.curTick()
end_tick = m5.curTick()
m5.stats.reset()
# We start the simulation
simulator.run()
exit_event = m5.simulate()
# The first exit_event ends with a `workbegin` cause. This means that the
# system booted successfully and the execution on the program started.
if exit_event.getCause() == "workbegin":
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
start_tick = m5.curTick()
# We have completed up to this step using KVM cpu. Now we switch to timing
# cpu for detailed simulation.
processor.switch()
else:
# `workbegin` call was never encountered.
print("Unexpected termination of simulation before ROI was reached!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# The next exit_event is to simulate the ROI. It should be exited with a cause
# marked by `workend`.
exit_event = m5.simulate()
# Reached the end of ROI.
# We dump the stats here.
# We exepect that ROI ends with `workend`. Otherwise the simulation ended
# unexpectedly.
if exit_event.getCause() == "workend":
print("Dump stats at the end of the ROI!")
m5.stats.dump()
end_tick = m5.curTick()
else:
print("Unexpected termination of simulation while ROI was being executed!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# ROI has ended here, and we get `simInsts` using get_simstat and print it in
# the final print statement.
gem5stats = get_simstat(root)
# We get the number of committed instructions from the timing
# cores. We then sum and print them at the end.
roi_insts = float(\
gem5stats.to_json()\
["system"]["processor"]["cores2"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]) + float(\
gem5stats.to_json()\
["system"]["processor"]["cores3"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]\
)
# Simulation is over at this point. We acknowledge that all the simulation
# events were successful.
print("All simulation events were successful.")
# We print the final simulation statistics.
@@ -289,8 +238,11 @@ print("Done with the simulation")
print()
print("Performance statistics:")
print("Simulated time in ROI: %.2fs" % ((end_tick-start_tick)/1e12))
print("Instructions executed in ROI: %d" % ((roi_insts)))
print("Ran a total of", m5.curTick()/1e12, "simulated seconds")
print("Total wallclock time: %.2fs, %.2f min" % \
(time.time()-globalStart, (time.time()-globalStart)/60))
print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
print(
"Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
)
print(
"Total wallclock time: %.2fs, %.2f min"
% (time.time() - globalStart, (time.time() - globalStart) / 60)
)

View File

@@ -59,13 +59,15 @@ from m5.objects import Root
from gem5.utils.requires import requires
from gem5.components.boards.x86_board import X86Board
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_switchable_processor import(
from gem5.components.processors.simple_switchable_processor import (
SimpleSwitchableProcessor,
)
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource, CustomDiskImageResource
from gem5.simulate.simulator import Simulator
from gem5.simulate.exit_event import ExitEvent
from m5.stats.gem5stats import get_simstat
from m5.util import warn
@@ -84,14 +86,38 @@ requires(
# have build errors, and, therefore cannot be executed. More information is
# available at: https://www.gem5.org/documentation/benchmark_status/gem5-20
benchmark_choices = ['400.perlbench', '401.bzip2', '403.gcc', '410.bwaves',
'416.gamess', '429.mcf', '433.milc', '435.gromacs',
'436.cactusADM', '437.leslie3d', '444.namd', '445.gobmk',
'447.dealII', '450.soplex', '453.povray', '454.calculix',
'456.hmmer', '458.sjeng', '459.GemsFDTD',
'462.libquantum', '464.h264ref', '465.tonto', '470.lbm',
'471.omnetpp', '473.astar', '481.wrf', '482.sphinx3',
'483.xalancbmk', '998.specrand', '999.specrand']
benchmark_choices = [
"400.perlbench",
"401.bzip2",
"403.gcc",
"410.bwaves",
"416.gamess",
"429.mcf",
"433.milc",
"435.gromacs",
"436.cactusADM",
"437.leslie3d",
"444.namd",
"445.gobmk",
"447.dealII",
"450.soplex",
"453.povray",
"454.calculix",
"456.hmmer",
"458.sjeng",
"459.GemsFDTD",
"462.libquantum",
"464.h264ref",
"465.tonto",
"470.lbm",
"471.omnetpp",
"473.astar",
"481.wrf",
"482.sphinx3",
"483.xalancbmk",
"998.specrand",
"999.specrand",
]
# Following are the input size.
@@ -109,34 +135,34 @@ parser = argparse.ArgumentParser(
parser.add_argument(
"--image",
type = str,
required = True,
help = "Input the full path to the built spec-2006 disk-image."
type=str,
required=True,
help="Input the full path to the built spec-2006 disk-image.",
)
parser.add_argument(
"--partition",
type = str,
required = False,
type=str,
required=False,
default=None,
help = "Input the root partition of the SPEC disk-image. If the disk is \
not partitioned, then pass \"\"."
help='Input the root partition of the SPEC disk-image. If the disk is \
not partitioned, then pass "".',
)
parser.add_argument(
"--benchmark",
type = str,
type=str,
required=True,
help = "Input the benchmark program to execute.",
help="Input the benchmark program to execute.",
choices=benchmark_choices,
)
parser.add_argument(
"--size",
type = str,
type=str,
required=True,
help = "Sumulation size the benchmark program.",
choices = size_choices,
help="Sumulation size the benchmark program.",
choices=size_choices,
)
args = parser.parse_args()
@@ -151,21 +177,20 @@ if not os.path.exists(args.image):
warn("Disk image not found!")
print("Instructions on building the disk image can be found at: ")
print(
"https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
"https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
)
fatal("The disk-image is not found at {}".format(args.image))
# Setting up all the fixed system parameters here
# Caches: MESI Two Level Cache Hierarchy
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import(
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
cache_hierarchy = MESITwoLevelCacheHierarchy(
l1d_size = "32kB",
l1d_assoc = 8,
l1d_size="32kB",
l1d_assoc=8,
l1i_size="32kB",
l1i_assoc=8,
l2_size="256kB",
@@ -175,7 +200,7 @@ cache_hierarchy = MESITwoLevelCacheHierarchy(
# Memory: Dual Channel DDR4 2400 DRAM device.
# The X86 board only supports 3 GB of main memory.
memory = DualChannelDDR4_2400(size = "3GB")
memory = DualChannelDDR4_2400(size="3GB")
# Here we setup the processor. This is a special switchable processor in which
# a starting core type and a switch core type must be specified. Once a
@@ -205,8 +230,8 @@ board = X86Board(
# m5.options.outdir and the output from the disk-image folder is copied to
# this folder.
output_dir = "speclogs_" + ''.join(x.strip() for x in time.asctime().split())
output_dir = output_dir.replace(":","")
output_dir = "speclogs_" + "".join(x.strip() for x in time.asctime().split())
output_dir = output_dir.replace(":", "")
# We create this folder if it is absent.
try:
@@ -234,27 +259,31 @@ board.set_kernel_disk_workload(
# `~/.cache/gem5` directory if not already present.
# SPEC CPU2006 benchamarks were tested with kernel version 4.19.83 and
# 5.4.49
kernel=Resource(
"x86-linux-kernel-4.19.83",
),
kernel=Resource("x86-linux-kernel-4.19.83"),
# The location of the x86 SPEC CPU 2017 image
disk_image=CustomDiskImageResource(
args.image,
disk_root_partition=args.partition,
args.image, disk_root_partition=args.partition
),
readfile_contents=command,
)
# We need this for long running processes.
m5.disableAllListeners()
root = Root(full_system = True, system = board)
def handle_exit():
print("Done bootling Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
yield False # E.g., continue the simulation.
print("Dump stats at the end of the ROI!")
m5.stats.dump()
yield True # Stop the simulation. We're done.
# sim_quantum must be set when KVM cores are used.
root.sim_quantum = int(1e9)
m5.instantiate()
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.EXIT: handle_exit(),
},
)
# We maintain the wall clock time.
@@ -263,94 +292,10 @@ globalStart = time.time()
print("Running the simulation")
print("Using KVM cpu")
start_tick = m5.curTick()
end_tick = m5.curTick()
m5.stats.reset()
exit_event = m5.simulate()
if exit_event.getCause() == "m5_exit instruction encountered":
# We have completed booting the OS using KVM cpu
# Reached the start of ROI
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
start_tick = m5.curTick()
# We switch to timing cpu for detailed simulation.
processor.switch()
else:
# `m5_exit instruction encountered` was never reached
print("Unexpected termination of simulation before ROI was reached!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# Simulate the ROI
exit_event = m5.simulate()
# Reached the end of ROI
gem5stats = get_simstat(root)
# We get the number of committed instructions from the timing
# cores. We then sum and print them at the end.
roi_insts = float(\
json.loads(gem5stats.dumps())\
["system"]["processor"]["cores2"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]) + float(\
json.loads(gem5stats.dumps())\
["system"]["processor"]["cores3"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]\
)
if exit_event.getCause() == "m5_exit instruction encountered":
print("Dump stats at the end of the ROI!")
m5.stats.dump()
end_tick = m5.curTick()
m5.stats.reset()
else:
# `m5_exit instruction encountered` was never reached
print("Unexpected termination of simulation while ROI was being executed!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# We need to copy back the contents of the `speclogs' directory to
# m5.options.outdir
exit_event = m5.simulate()
if exit_event.getCause() == "m5_exit instruction encountered":
print("Output logs copied!")
else:
print("Unexpected termination of simulation while copying speclogs!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
m5.stats.dump()
end_tick = m5.curTick()
m5.stats.reset()
# We start the simulation
simulator.run()
# Simulation is over at this point. We acknowledge that all the simulation
# events were successful.
@@ -359,8 +304,11 @@ print("All simulation events were successful.")
print("Performance statistics:")
print("Simulated time: %.2fs" % ((end_tick-start_tick)/1e12))
print("Instructions executed: %d" % ((roi_insts)))
print("Ran a total of", m5.curTick()/1e12, "simulated seconds")
print("Total wallclock time: %.2fs, %.2f min" % \
(time.time()-globalStart, (time.time()-globalStart)/60))
print("Simulated time: " + ((str(simulator.get_roi_ticks()[0]))))
print(
"Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
)
print(
"Total wallclock time: %.2fs, %.2f min"
% (time.time() - globalStart, (time.time() - globalStart) / 60)
)

View File

@@ -57,13 +57,15 @@ from m5.objects import Root
from gem5.utils.requires import requires
from gem5.components.boards.x86_board import X86Board
from gem5.components.memory import DualChannelDDR4_2400
from gem5.components.processors.simple_switchable_processor import(
from gem5.components.processors.simple_switchable_processor import (
SimpleSwitchableProcessor,
)
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource, CustomDiskImageResource
from gem5.simulate.simulator import Simulator
from gem5.simulate.exit_event import ExitEvent
from m5.stats.gem5stats import get_simstat
from m5.util import warn
@@ -81,22 +83,54 @@ requires(
# More information is available at:
# https://www.gem5.org/documentation/benchmark_status/gem5-20
benchmark_choices =["500.perlbench_r", "502.gcc_r", "503.bwaves_r",
"505.mcf_r", "507.cactusBSSN_r", "508.namd_r",
"510.parest_r", "511.povray_r", "519.lbm_r",
"520.omnetpp_r", "521.wrf_r", "523.xalancbmk_r",
"525.x264_r", "527.cam4_r", "531.deepsjeng_r",
"538.imagick_r", "541.leela_r", "544.nab_r",
"548.exchange2_r", "549.fotonik3d_r", "554.roms_r",
"557.xz_r", "600.perlbench_s", "602.gcc_s",
"603.bwaves_s", "605.mcf_s", "607.cactusBSSN_s",
"608.namd_s", "610.parest_s", "611.povray_s",
"619.lbm_s", "620.omnetpp_s", "621.wrf_s",
"623.xalancbmk_s", "625.x264_s", "627.cam4_s",
"631.deepsjeng_s", "638.imagick_s", "641.leela_s",
"644.nab_s", "648.exchange2_s", "649.fotonik3d_s",
"654.roms_s", "996.specrand_fs", "997.specrand_fr",
"998.specrand_is", "999.specrand_ir"
benchmark_choices = [
"500.perlbench_r",
"502.gcc_r",
"503.bwaves_r",
"505.mcf_r",
"507.cactusBSSN_r",
"508.namd_r",
"510.parest_r",
"511.povray_r",
"519.lbm_r",
"520.omnetpp_r",
"521.wrf_r",
"523.xalancbmk_r",
"525.x264_r",
"527.cam4_r",
"531.deepsjeng_r",
"538.imagick_r",
"541.leela_r",
"544.nab_r",
"548.exchange2_r",
"549.fotonik3d_r",
"554.roms_r",
"557.xz_r",
"600.perlbench_s",
"602.gcc_s",
"603.bwaves_s",
"605.mcf_s",
"607.cactusBSSN_s",
"608.namd_s",
"610.parest_s",
"611.povray_s",
"619.lbm_s",
"620.omnetpp_s",
"621.wrf_s",
"623.xalancbmk_s",
"625.x264_s",
"627.cam4_s",
"631.deepsjeng_s",
"638.imagick_s",
"641.leela_s",
"644.nab_s",
"648.exchange2_s",
"649.fotonik3d_s",
"654.roms_s",
"996.specrand_fs",
"997.specrand_fr",
"998.specrand_is",
"999.specrand_ir",
]
# Following are the input size.
@@ -115,34 +149,34 @@ parser = argparse.ArgumentParser(
parser.add_argument(
"--image",
type = str,
required = True,
help = "Input the full path to the built spec-2017 disk-image."
type=str,
required=True,
help="Input the full path to the built spec-2017 disk-image.",
)
parser.add_argument(
"--partition",
type = str,
required = False,
type=str,
required=False,
default=None,
help = "Input the root partition of the SPEC disk-image. If the disk is \
not partitioned, then pass \"\"."
help='Input the root partition of the SPEC disk-image. If the disk is \
not partitioned, then pass "".',
)
parser.add_argument(
"--benchmark",
type = str,
required = True,
help = "Input the benchmark program to execute.",
type=str,
required=True,
help="Input the benchmark program to execute.",
choices=benchmark_choices,
)
parser.add_argument(
"--size",
type = str,
required = True,
help = "Sumulation size the benchmark program.",
choices = size_choices,
type=str,
required=True,
help="Sumulation size the benchmark program.",
choices=size_choices,
)
args = parser.parse_args()
@@ -157,21 +191,20 @@ if not os.path.exists(args.image):
warn("Disk image not found!")
print("Instructions on building the disk image can be found at: ")
print(
"https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
"https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
)
fatal("The disk-image is not found at {}".format(args.image))
# Setting up all the fixed system parameters here
# Caches: MESI Two Level Cache Hierarchy
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import(
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
cache_hierarchy = MESITwoLevelCacheHierarchy(
l1d_size = "32kB",
l1d_assoc = 8,
l1d_size="32kB",
l1d_assoc=8,
l1i_size="32kB",
l1i_assoc=8,
l2_size="256kB",
@@ -181,7 +214,7 @@ cache_hierarchy = MESITwoLevelCacheHierarchy(
# Memory: Dual Channel DDR4 2400 DRAM device.
# The X86 board only supports 3 GB of main memory.
memory = DualChannelDDR4_2400(size = "3GB")
memory = DualChannelDDR4_2400(size="3GB")
# Here we setup the processor. This is a special switchable processor in which
# a starting core type and a switch core type must be specified. Once a
@@ -211,8 +244,8 @@ board = X86Board(
# m5.options.outdir and the output from the disk-image folder is copied to
# this folder.
output_dir = "speclogs_" + ''.join(x.strip() for x in time.asctime().split())
output_dir = output_dir.replace(":","")
output_dir = "speclogs_" + "".join(x.strip() for x in time.asctime().split())
output_dir = output_dir.replace(":", "")
# We create this folder if it is absent.
try:
@@ -242,27 +275,31 @@ board.set_kernel_disk_workload(
# The x86 linux kernel will be automatically downloaded to the
# `~/.cache/gem5` directory if not already present.
# SPEC CPU2017 benchamarks were tested with kernel version 4.19.83
kernel=Resource(
"x86-linux-kernel-4.19.83",
),
kernel=Resource("x86-linux-kernel-4.19.83"),
# The location of the x86 SPEC CPU 2017 image
disk_image=CustomDiskImageResource(
args.image,
disk_root_partition=args.partition,
args.image, disk_root_partition=args.partition
),
readfile_contents=command,
)
# We need this for long running processes.
m5.disableAllListeners()
root = Root(full_system = True, system = board)
def handle_exit():
print("Done bootling Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
yield False # E.g., continue the simulation.
print("Dump stats at the end of the ROI!")
m5.stats.dump()
yield True # Stop the simulation. We're done.
# sim_quantum must be set when KVM cores are used.
root.sim_quantum = int(1e9)
m5.instantiate()
simulator = Simulator(
board=board,
on_exit_event={
ExitEvent.EXIT: handle_exit(),
},
)
# We maintain the wall clock time.
@@ -271,94 +308,22 @@ globalStart = time.time()
print("Running the simulation")
print("Using KVM cpu")
start_tick = m5.curTick()
end_tick = m5.curTick()
m5.stats.reset()
exit_event = m5.simulate()
# We start the simulation
simulator.run()
if exit_event.getCause() == "m5_exit instruction encountered":
# We have completed booting the OS using KVM cpu
# Reached the start of ROI
print("Done booting Linux")
print("Resetting stats at the start of ROI!")
m5.stats.reset()
start_tick = m5.curTick()
# We switch to timing cpu for detailed simulation.
processor.switch()
else:
print("Unexpected termination of simulation before ROI was reached!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# Simulate the ROI
exit_event = m5.simulate()
# Reached the end of ROI
gem5stats = get_simstat(root)
# We get the number of committed instructions from the timing
# cores. We then sum and print them at the end.
roi_insts = float(\
json.loads(gem5stats.dumps())\
["system"]["processor"]["cores2"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]
) + float(\
json.loads(gem5stats.dumps())\
["system"]["processor"]["cores3"]["core"]["exec_context.thread_0"]\
["numInsts"]["value"]\
)
if exit_event.getCause() == "m5_exit instruction encountered":
print("Dump stats at the end of the ROI!")
m5.stats.dump()
end_tick = m5.curTick()
m5.stats.reset()
else:
print("Unexpected termination of simulation while ROI was being executed!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# We need to copy back the contents of the `speclogs' directory to
# m5.options.outdir
exit_event = m5.simulate()
if exit_event.getCause() == "m5_exit instruction encountered":
print("Output logs copied!")
else:
print("Unexpected termination of simulation while copying speclogs!")
print(
"Exiting @ tick {} because {}.".format(
m5.curTick(),
exit_event.getCause()
)
)
exit(-1)
# We print the final simulation statistics.
print("Done with the simulation")
print()
print("Performance statistics:")
print("Simulated time in ROI: %.2fs" % ((end_tick-start_tick)/1e12))
print("Instructions executed in ROI: %d" % ((roi_insts)))
print("Ran a total of", m5.curTick()/1e12, "simulated seconds")
print("Total wallclock time: %.2fs, %.2f min" % \
(time.time()-globalStart, (time.time()-globalStart)/60))
print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
print(
"Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
)
print(
"Total wallclock time: %.2fs, %.2f min"
% (time.time() - globalStart, (time.time() - globalStart) / 60)
)

View File

@@ -49,9 +49,9 @@ from gem5.components.processors.simple_switchable_processor import (
from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.coherence_protocol import CoherenceProtocol
from gem5.resources.resource import Resource
from gem5.simulate.simulator import Simulator
from gem5.simulate.exit_event import ExitEvent
from gem5.resources.workload import Workload
# This runs a check to ensure the gem5 binary is compiled to X86 and to the
# MESI Two Level coherence protocol.
@@ -61,8 +61,7 @@ requires(
kvm_required=True,
)
from gem5.components.cachehierarchies.ruby.\
mesi_two_level_cache_hierarchy import (
from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
MESITwoLevelCacheHierarchy,
)
@@ -111,21 +110,17 @@ board = X86Board(
# then, again, call `m5 exit` to terminate the simulation. After simulation
# has ended you may inspect `m5out/system.pc.com_1.device` to see the echo
# output.
command = "m5 exit;" \
+ "echo 'This is running on Timing CPU cores.';" \
+ "sleep 1;" \
+ "m5 exit;"
board.set_kernel_disk_workload(
# The x86 linux kernel will be automatically downloaded to the if not
# already present.
kernel=Resource("x86-linux-kernel-5.4.49"),
# The x86 ubuntu image will be automatically downloaded to the if not
# already present.
disk_image=Resource("x86-ubuntu-18.04-img"),
readfile_contents=command,
command = (
"m5 exit;"
+ "echo 'This is running on Timing CPU cores.';"
+ "sleep 1;"
+ "m5 exit;"
)
workload = Workload("x86-ubuntu-18.04-boot")
workload.set_parameter("readfile_contents", command)
board.set_workload(workload)
simulator = Simulator(
board=board,
on_exit_event={
@@ -133,7 +128,7 @@ simulator = Simulator(
# exit event. Instead of exiting the simulator, we just want to
# switch the processor. The 2nd m5 exit after will revert to using
# default behavior where the simulator run will exit.
ExitEvent.EXIT : (func() for func in [processor.switch]),
ExitEvent.EXIT: (func() for func in [processor.switch])
},
)
simulator.run()

View File

@@ -45,7 +45,7 @@ scons build/X86/gem5.opt
"""
from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
from gem5.resources.resource import Resource
from gem5.resources.workload import Workload
from gem5.simulate.simulator import Simulator
@@ -53,13 +53,10 @@ from gem5.simulate.simulator import Simulator
# simulation.
board = X86DemoBoard()
# We then set the workload. Here we use the 5.4.49 Linux kernel with an X86
# Ubuntu OS. If these cannot be found locally they will be automatically
# downloaded.
board.set_kernel_disk_workload(
kernel=Resource("x86-linux-kernel-5.4.49"),
disk_image=Resource("x86-ubuntu-18.04-img"),
)
# We then set the workload. Here we use the "x86-ubuntu-18.04-boot" workload.
# This boots Ubuntu 18.04 with Linux 5.4.49. If the required resources are not
# found locally, they will be downloaded.
board.set_workload(Workload("x86-ubuntu-18.04-boot"))
simulator = Simulator(board=board)
simulator.run()

View File

@@ -34,8 +34,8 @@ from importlib import *
from network import Network
class DisjointSimple(SimpleNetwork):
class DisjointSimple(SimpleNetwork):
def __init__(self, ruby_system):
super(DisjointSimple, self).__init__()
@@ -51,8 +51,7 @@ class DisjointSimple(SimpleNetwork):
topo_module = import_module("topologies.%s" % opts.cpu_topology)
topo_class = getattr(topo_module, opts.cpu_topology)
_topo = topo_class(controllers)
_topo.makeTopology(opts, self, SimpleIntLink,
SimpleExtLink, Switch)
_topo.makeTopology(opts, self, SimpleIntLink, SimpleExtLink, Switch)
self.initSimple(opts, self.int_links, self.ext_links)
@@ -62,12 +61,10 @@ class DisjointSimple(SimpleNetwork):
topo_module = import_module("topologies.%s" % opts.gpu_topology)
topo_class = getattr(topo_module, opts.gpu_topology)
_topo = topo_class(controllers)
_topo.makeTopology(opts, self, SimpleIntLink,
SimpleExtLink, Switch)
_topo.makeTopology(opts, self, SimpleIntLink, SimpleExtLink, Switch)
self.initSimple(opts, self.int_links, self.ext_links)
def initSimple(self, opts, int_links, ext_links):
# Attach links to network
@@ -76,8 +73,8 @@ class DisjointSimple(SimpleNetwork):
self.setup_buffers()
class DisjointGarnet(GarnetNetwork):
class DisjointGarnet(GarnetNetwork):
def __init__(self, ruby_system):
super(DisjointGarnet, self).__init__()
@@ -90,8 +87,9 @@ class DisjointGarnet(GarnetNetwork):
topo_module = import_module("topologies.%s" % opts.cpu_topology)
topo_class = getattr(topo_module, opts.cpu_topology)
_topo = topo_class(controllers)
_topo.makeTopology(opts, self, GarnetIntLink,
GarnetExtLink, GarnetRouter)
_topo.makeTopology(
opts, self, GarnetIntLink, GarnetExtLink, GarnetRouter
)
Network.init_network(opts, self, GarnetNetworkInterface)
@@ -101,7 +99,8 @@ class DisjointGarnet(GarnetNetwork):
topo_module = import_module("topologies.%s" % opts.gpu_topology)
topo_class = getattr(topo_module, opts.gpu_topology)
_topo = topo_class(controllers)
_topo.makeTopology(opts, self, GarnetIntLink,
GarnetExtLink, GarnetRouter)
_topo.makeTopology(
opts, self, GarnetIntLink, GarnetExtLink, GarnetRouter
)
Network.init_network(opts, self, GarnetNetworkInterface)

View File

@@ -36,7 +36,7 @@ from ruby.GPU_VIPER import *
from ruby import Ruby
class DummySystem():
class DummySystem:
def __init__(self, mem_ranges):
self.mem_ctrls = []
@@ -45,7 +45,7 @@ class DummySystem():
class Disjoint_VIPER(RubySystem):
def __init__(self):
if buildEnv['PROTOCOL'] != "GPU_VIPER":
if buildEnv["PROTOCOL"] != "GPU_VIPER":
fatal("This ruby config only supports the GPU_VIPER protocol")
super(Disjoint_VIPER, self).__init__()
@@ -60,29 +60,33 @@ class Disjoint_VIPER(RubySystem):
self.network_cpu = DisjointSimple(self)
self.network_gpu = DisjointSimple(self)
# Construct CPU controllers
cpu_dir_nodes = \
construct_dirs(options, system, self, self.network_cpu)
(cp_sequencers, cp_cntrl_nodes) = \
construct_corepairs(options, system, self, self.network_cpu)
cpu_dir_nodes = construct_dirs(options, system, self, self.network_cpu)
(cp_sequencers, cp_cntrl_nodes) = construct_corepairs(
options, system, self, self.network_cpu
)
# Construct GPU controllers
(tcp_sequencers, tcp_cntrl_nodes) = \
construct_tcps(options, system, self, self.network_gpu)
(sqc_sequencers, sqc_cntrl_nodes) = \
construct_sqcs(options, system, self, self.network_gpu)
(scalar_sequencers, scalar_cntrl_nodes) = \
construct_scalars(options, system, self, self.network_gpu)
tcc_cntrl_nodes = \
construct_tccs(options, system, self, self.network_gpu)
(tcp_sequencers, tcp_cntrl_nodes) = construct_tcps(
options, system, self, self.network_gpu
)
(sqc_sequencers, sqc_cntrl_nodes) = construct_sqcs(
options, system, self, self.network_gpu
)
(scalar_sequencers, scalar_cntrl_nodes) = construct_scalars(
options, system, self, self.network_gpu
)
tcc_cntrl_nodes = construct_tccs(
options, system, self, self.network_gpu
)
# Construct CPU memories
Ruby.setup_memory_controllers(system, self, cpu_dir_nodes, options)
# Construct GPU memories
(gpu_dir_nodes, gpu_mem_ctrls) = \
construct_gpudirs(options, system, self, self.network_gpu)
(gpu_dir_nodes, gpu_mem_ctrls) = construct_gpudirs(
options, system, self, self.network_gpu
)
# Configure the directories based on which network they are in
for cpu_dir_node in cpu_dir_nodes:
@@ -115,11 +119,12 @@ class Disjoint_VIPER(RubySystem):
dma_cntrls = []
for i, dma_device in enumerate(dma_devices):
dma_seq = DMASequencer(version=i, ruby_system=self)
dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq,
ruby_system=self)
dma_cntrl = DMA_Controller(
version=i, dma_sequencer=dma_seq, ruby_system=self
)
# Handle inconsistently named ports on various DMA devices:
if not hasattr(dma_device, 'type'):
if not hasattr(dma_device, "type"):
# IDE doesn't have a .type but seems like everything else does.
dma_seq.in_ports = dma_device
elif dma_device.type in gpu_dma_types:
@@ -127,13 +132,15 @@ class Disjoint_VIPER(RubySystem):
else:
dma_seq.in_ports = dma_device.dma
if hasattr(dma_device, 'type') and \
dma_device.type in gpu_dma_types:
if (
hasattr(dma_device, "type")
and dma_device.type in gpu_dma_types
):
dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
dma_cntrl.requestToDir.out_port = self.network_gpu.in_port
dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
dma_cntrl.responseFromDir.in_port = self.network_gpu.out_port
dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size=0)
gpu_dma_ctrls.append(dma_cntrl)
else:
@@ -141,7 +148,7 @@ class Disjoint_VIPER(RubySystem):
dma_cntrl.requestToDir.out_port = self.network_cpu.in_port
dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
dma_cntrl.responseFromDir.in_port = self.network_cpu.out_port
dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size=0)
cpu_dma_ctrls.append(dma_cntrl)
@@ -149,32 +156,32 @@ class Disjoint_VIPER(RubySystem):
system.dma_cntrls = dma_cntrls
# Collect CPU and GPU controllers into seperate lists
cpu_cntrls = cpu_dir_nodes + cp_cntrl_nodes + cpu_dma_ctrls
gpu_cntrls = tcp_cntrl_nodes + sqc_cntrl_nodes + \
scalar_cntrl_nodes + tcc_cntrl_nodes + gpu_dma_ctrls + \
gpu_dir_nodes
gpu_cntrls = (
tcp_cntrl_nodes
+ sqc_cntrl_nodes
+ scalar_cntrl_nodes
+ tcc_cntrl_nodes
+ gpu_dma_ctrls
+ gpu_dir_nodes
)
# Setup number of vnets
self.number_of_virtual_networks = 11
self.network_cpu.number_of_virtual_networks = 11
self.network_gpu.number_of_virtual_networks = 11
# Set up the disjoint topology
self.network_cpu.connectCPU(options, cpu_cntrls)
self.network_gpu.connectGPU(options, gpu_cntrls)
# Create port proxy for connecting system port. System port is used
# for loading from outside guest, e.g., binaries like vmlinux.
system.sys_port_proxy = RubyPortProxy(ruby_system = self)
system.sys_port_proxy = RubyPortProxy(ruby_system=self)
system.sys_port_proxy.pio_request_port = piobus.cpu_side_ports
system.system_port = system.sys_port_proxy.in_ports
# Only CPU sequencers connect to PIO bus. This acts as the "default"
# destination for unknown address ranges. PCIe requests fall under
# this category.
@@ -188,9 +195,9 @@ class Disjoint_VIPER(RubySystem):
if i < options.num_cpus:
cp_sequencers[i].pio_response_port = piobus.mem_side_ports
# Setup ruby port. Both CPU and GPU are actually connected here.
all_sequencers = cp_sequencers + tcp_sequencers + \
sqc_sequencers + scalar_sequencers
all_sequencers = (
cp_sequencers + tcp_sequencers + sqc_sequencers + scalar_sequencers
)
self._cpu_ports = all_sequencers
self.num_of_sequencers = len(all_sequencers)

Some files were not shown because too many files have changed in this diff Show More