Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d49970ac7 | |||
| 523d4ae4ec | |||
| 9ff0e4ad51 | |||
| 9349f5ca10 | |||
| 15382b3b13 | |||
| fa5890dcd7 | |||
| a098b7099f | |||
| f524a5b1a4 | |||
| e4fadc0435 | |||
| dd965da616 | |||
| fc80e7b8ec | |||
| c13b79977c |
@@ -37,9 +37,8 @@ system.mem_mode = "timing"
|
||||
system.cpu = traffic_gen
|
||||
|
||||
dramsys = DRAMSys(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/ddr4-example.json",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
configuration="ext/dramsys/DRAMSys/configs/ddr4-example.json",
|
||||
resource_directory="ext/dramsys/DRAMSys/configs",
|
||||
)
|
||||
|
||||
system.target = dramsys
|
||||
|
||||
@@ -43,7 +43,7 @@ scons build/ARM/gem5.opt
|
||||
|
||||
from gem5.isas import ISA
|
||||
from gem5.utils.requires import requires
|
||||
from gem5.resources.resource import Resource
|
||||
from gem5.resources.resource import BinaryResource
|
||||
from gem5.components.memory import SingleChannelDDR3_1600
|
||||
from gem5.components.processors.cpu_types import CPUTypes
|
||||
from gem5.components.boards.simple_board import SimpleBoard
|
||||
@@ -84,7 +84,7 @@ board.set_se_binary_workload(
|
||||
# Any resource specified in this file will be automatically retrieved.
|
||||
# At the time of writing, this file is a WIP and does not contain all
|
||||
# resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
|
||||
Resource("arm-hello64-static")
|
||||
BinaryResource("physical")
|
||||
)
|
||||
|
||||
# Lastly we run the simulation.
|
||||
|
||||
@@ -31,16 +31,14 @@ DRAMSys simulator.
|
||||
DRRAMSys simulator. Please consult 'ext/dramsys/README' on how to compile
|
||||
correctly. If this is not done correctly this script will run with error.
|
||||
"""
|
||||
import m5
|
||||
from gem5.components.memory import DRAMSysMem
|
||||
|
||||
from gem5.components.memory.dramsys import DRAMSysMem
|
||||
from gem5.components.boards.test_board import TestBoard
|
||||
from gem5.components.processors.linear_generator import LinearGenerator
|
||||
from m5.objects import Root
|
||||
from gem5.simulate.simulator import Simulator
|
||||
|
||||
memory = DRAMSysMem(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/ddr4-example.json",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
configuration="ext/dramsys/DRAMSys/configs/ddr4-example.json",
|
||||
recordable=True,
|
||||
size="4GB",
|
||||
)
|
||||
@@ -51,12 +49,16 @@ generator = LinearGenerator(
|
||||
num_cores=1,
|
||||
max_addr=memory.get_size(),
|
||||
)
|
||||
|
||||
board = TestBoard(
|
||||
clk_freq="3GHz", generator=generator, memory=memory, cache_hierarchy=None
|
||||
)
|
||||
|
||||
root = Root(full_system=False, system=board)
|
||||
board._pre_instantiate()
|
||||
m5.instantiate()
|
||||
generator.start_traffic()
|
||||
exit_event = m5.simulate()
|
||||
simulator = Simulator(board=board)
|
||||
simulator.run()
|
||||
|
||||
print(
|
||||
"Exiting @ tick {} because {}.".format(
|
||||
simulator.get_current_tick(), simulator.get_last_exit_event_cause()
|
||||
)
|
||||
)
|
||||
|
||||
15
configs/pim_config.py
Normal file
15
configs/pim_config.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Configuration:
|
||||
name: str
|
||||
workload: str
|
||||
executable: Path
|
||||
level: str
|
||||
system: str
|
||||
frequency: str = "3GHz"
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Statistics:
|
||||
ticks: int
|
||||
99
configs/pim_simulation.py
Normal file
99
configs/pim_simulation.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import m5
|
||||
import json
|
||||
import dataclasses
|
||||
import sys
|
||||
|
||||
from gem5.isas import ISA
|
||||
from m5.objects import (
|
||||
ArmDefaultRelease,
|
||||
)
|
||||
from gem5.utils.requires import requires
|
||||
from gem5.resources.workload import CustomWorkload
|
||||
from gem5.resources.resource import BinaryResource
|
||||
from gem5.simulate.simulator import Simulator
|
||||
from m5.objects import VExpress_GEM5_Foundation
|
||||
from gem5.components.boards.arm_baremetal_board import ArmBareMetalBoard
|
||||
from gem5.components.memory import DRAMSysHBM2
|
||||
from gem5.components.processors.cpu_types import CPUTypes
|
||||
from gem5.components.processors.simple_processor import SimpleProcessor
|
||||
from gem5.simulate.exit_event import ExitEvent
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pim_config import Configuration, Statistics
|
||||
|
||||
requires(isa_required=ISA.ARM)
|
||||
|
||||
from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
|
||||
PrivateL1PrivateL2CacheHierarchy,
|
||||
)
|
||||
from gem5.components.cachehierarchies.classic.no_cache import NoCache
|
||||
|
||||
configuration = Configuration(**json.loads(sys.argv[1]))
|
||||
|
||||
cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
|
||||
l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
|
||||
)
|
||||
|
||||
memory = DRAMSysHBM2(recordable=False)
|
||||
processor = SimpleProcessor(cpu_type=CPUTypes.O3, num_cores=1, isa=ISA.ARM)
|
||||
release = ArmDefaultRelease()
|
||||
platform = VExpress_GEM5_Foundation()
|
||||
|
||||
board = ArmBareMetalBoard(
|
||||
clk_freq=configuration.frequency,
|
||||
processor=processor,
|
||||
memory=memory,
|
||||
cache_hierarchy=cache_hierarchy,
|
||||
release=release,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
# HBM2 requires line size of 32 Bytes
|
||||
board.cache_line_size = 32
|
||||
|
||||
for core in processor.get_cores():
|
||||
core.core.fetchBufferSize = 32
|
||||
|
||||
workload = CustomWorkload(
|
||||
"set_baremetal_workload",
|
||||
{
|
||||
"kernel": BinaryResource(configuration.executable),
|
||||
},
|
||||
)
|
||||
board.set_workload(workload)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkloadTime:
|
||||
start: int
|
||||
end: int
|
||||
|
||||
|
||||
workload_time = WorkloadTime(0, 0)
|
||||
|
||||
|
||||
def exit_event():
|
||||
print(f"Workload begin @{m5.curTick()}")
|
||||
workload_time.start = m5.curTick()
|
||||
m5.stats.reset()
|
||||
yield False
|
||||
|
||||
print(f"Workload end @{m5.curTick()}")
|
||||
workload_time.end = m5.curTick()
|
||||
m5.stats.dump()
|
||||
yield False
|
||||
|
||||
print(f"Exit simulation @{m5.curTick()}...")
|
||||
yield True
|
||||
|
||||
|
||||
simulator = Simulator(
|
||||
board=board, on_exit_event={ExitEvent.EXIT: exit_event()}
|
||||
)
|
||||
|
||||
simulator.run()
|
||||
|
||||
print(f"Workload took {workload_time.end - workload_time.start}")
|
||||
|
||||
statistics = Statistics(workload_time.end - workload_time.start)
|
||||
print(json.dumps(dataclasses.asdict(statistics)))
|
||||
38
ext/dramsys/CMakeLists.txt
Normal file
38
ext/dramsys/CMakeLists.txt
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) 2023 Fraunhofer IESE
|
||||
# All rights reserved
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.22.0)
|
||||
project(DRAMSys)
|
||||
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
|
||||
add_library(systemc INTERFACE)
|
||||
|
||||
target_include_directories(systemc INTERFACE "${SCONS_SOURCE_DIR}/src/systemc/ext/systemc_home/include")
|
||||
|
||||
add_library(SystemC::systemc ALIAS systemc)
|
||||
|
||||
add_subdirectory(DRAMSys)
|
||||
@@ -1,10 +1,13 @@
|
||||
Follow these steps to get DRAMSys as part of gem5
|
||||
Follow these steps to build DRAMSys as part of gem5
|
||||
|
||||
1. Go to ext/dramsys (this directory)
|
||||
2. Clone DRAMSys: 'git clone --recursive git@github.com:tukl-msd/DRAMSys.git DRAMSys'
|
||||
3. Change directory to DRAMSys: 'cd DRAMSys'
|
||||
4. Checkout the correct commit: 'git checkout -b gem5 09f6dcbb91351e6ee7cadfc7bc8b29d97625db8f'
|
||||
2. Clone DRAMSys: 'git clone https://github.com/tukl-msd/DRAMSys --branch v5.0 --depth 1 DRAMSys'
|
||||
|
||||
The latest verified working version is v5.0, but later versions might work too.
|
||||
gem5 will automatically pick up DRAMSys as an external module when it is rebuilt.
|
||||
|
||||
If you wish to run a simulation using the gem5 processor cores, make sure to enable the storage mode in DRAMSys.
|
||||
This is done by setting the value of the "StoreMode" key to "Store" in the base configuration file.
|
||||
Those configuration file can be found in 'DRAMSys/library/resources/configs/simulator'.
|
||||
|
||||
Currently, DRAMSys is only supported in conjunction with a cache. Running DRAMSys in Release mode without caches will silently fail!
|
||||
|
||||
@@ -25,72 +25,58 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
Import('env')
|
||||
Import("env")
|
||||
|
||||
build_root = Dir('../..').abspath
|
||||
src_root = Dir('DRAMSys/DRAMSys/library').srcnode().abspath
|
||||
build_root = Dir("../..").abspath
|
||||
build_current = Dir(".").abspath
|
||||
src_root = Dir(".").srcnode().abspath
|
||||
scons_root = Dir("#").abspath
|
||||
|
||||
# See if we got a cloned DRAMSys repo as a subdirectory and set the
|
||||
# HAVE_DRAMSys flag accordingly
|
||||
if not os.path.exists(Dir('.').srcnode().abspath + '/DRAMSys'):
|
||||
env['HAVE_DRAMSYS'] = False
|
||||
if not os.path.exists(Dir(".").srcnode().abspath + "/DRAMSys"):
|
||||
env["HAVE_DRAMSYS"] = False
|
||||
Return()
|
||||
|
||||
env['HAVE_DRAMSYS'] = True
|
||||
env["HAVE_DRAMSYS"] = True
|
||||
|
||||
dramsys_files = []
|
||||
dramsys_configuration_files = []
|
||||
subprocess.run(
|
||||
[
|
||||
"cmake",
|
||||
f"-S{src_root}",
|
||||
f"-B{build_current}",
|
||||
"-DCMAKE_BUILD_TYPE=Release",
|
||||
f"-DSCONS_SOURCE_DIR:STRING={scons_root}",
|
||||
"-DDRAMSYS_BUILD_CLI=OFF",
|
||||
"-DDRAMSYS_SHARED_PIM_UNITS=ON"
|
||||
],
|
||||
check=True
|
||||
)
|
||||
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/controller"))
|
||||
for root, dirs, files in os.walk(f"{src_root}/src/controller", topdown=False):
|
||||
for dir in dirs:
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
|
||||
subprocess.run(
|
||||
["cmake", "--build", build_current],
|
||||
check=True
|
||||
)
|
||||
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/simulation"))
|
||||
for root, dirs, files in os.walk(f"{src_root}/src/simulation", topdown=False):
|
||||
for dir in dirs:
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
|
||||
env.Append(LIBS="DRAMSys_libdramsys")
|
||||
env.Append(LIBPATH=Dir("./DRAMSys/src/libdramsys").abspath)
|
||||
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/configuration"))
|
||||
for root, dirs, files in os.walk(f"{src_root}/src/configuration", topdown=False):
|
||||
for dir in dirs:
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
|
||||
env.Append(LIBS=["libpim_vm", "libpim-vm-cxx"])
|
||||
env.Append(LIBPATH=Dir("./DRAMSys").abspath)
|
||||
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/error"))
|
||||
dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Bit.cpp"))
|
||||
dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/ECC.cpp"))
|
||||
dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Word.cpp"))
|
||||
env.Append(LIBS="DRAMSys_Configuration")
|
||||
env.Append(LIBPATH=Dir("./DRAMSys/src/configuration").abspath)
|
||||
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common"))
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration"))
|
||||
dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration/memspec"))
|
||||
dramsys_files.extend(Glob("%s/*.c" % f"{src_root}/src/common/third_party/sqlite-amalgamation"))
|
||||
env.Append(LIBS="sqlite3")
|
||||
env.Append(LIBPATH=Dir("./DRAMSys/lib/sqlite3").abspath)
|
||||
|
||||
env.Prepend(CPPPATH=[
|
||||
src_root + "/src",
|
||||
src_root + "/src/common/configuration",
|
||||
src_root + "/src/common/third_party/nlohmann/include",
|
||||
])
|
||||
env.Append(CPPPATH=src_root + "/DRAMSys/src/libdramsys")
|
||||
env.Append(CPPPATH=src_root + "/DRAMSys/src/configuration")
|
||||
env.Append(CPPPATH=src_root + "/DRAMSys/src/util")
|
||||
env.Append(CPPPATH=src_root + "/DRAMSys/lib/nlohmann_json/include")
|
||||
|
||||
env.Prepend(CPPDEFINES=[("DRAMSysResourceDirectory", '\\"' + os.getcwd() + '/resources' + '\\"')])
|
||||
env.Prepend(CPPDEFINES=[("SYSTEMC_VERSION", 20191203)])
|
||||
|
||||
dramsys = env.Clone()
|
||||
|
||||
if '-Werror' in dramsys['CCFLAGS']:
|
||||
dramsys['CCFLAGS'].remove('-Werror')
|
||||
|
||||
dramsys.Prepend(CPPPATH=[
|
||||
src_root + "/src/common/third_party/sqlite-amalgamation",
|
||||
build_root + "/systemc/ext"
|
||||
])
|
||||
|
||||
dramsys.Prepend(CPPDEFINES=[("SQLITE_ENABLE_RTREE", "1")])
|
||||
|
||||
dramsys_configuration = env.Clone()
|
||||
|
||||
dramsys.Library('dramsys', dramsys_files)
|
||||
|
||||
env.Append(LIBS=['dramsys', 'dl'])
|
||||
env.Append(LIBPATH=[Dir('.')])
|
||||
env.Prepend(CPPDEFINES=[("DRAMSYS_RESOURCE_DIR",
|
||||
'\\"' + os.getcwd() + '/DRAMSys/configs' + '\\"')])
|
||||
|
||||
45
latex_table.py
Normal file
45
latex_table.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import polars as pl
|
||||
import numpy as np
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
workload_order = {val: idx for idx, val in enumerate(["vadd", "vmul", "haxpy", "gemv", "gemv_layers"])}
|
||||
|
||||
workload_mapping = {
|
||||
"vadd": "VADD",
|
||||
"vmul": "VMUL",
|
||||
"haxpy": "HAXPY",
|
||||
"gemv": "GEMV",
|
||||
"gemv_layers": "DNN",
|
||||
}
|
||||
|
||||
out_directory = Path("tables_out")
|
||||
|
||||
df = pl.read_csv("pim_results.csv")
|
||||
df = df.select(["workload", "level", "system", "frequency", "ticks"])
|
||||
|
||||
for name, data in df.group_by(["frequency"], maintain_order=True):
|
||||
data = data.pivot(index=["workload", "level"], columns=["system"], values=["ticks"])
|
||||
data = data.sort(pl.col("workload").replace(workload_order))
|
||||
data = data.with_columns(pl.col("workload").replace(workload_mapping))
|
||||
data = data.rename({"HBM": "hbm", "PIM-HBM": "pim"})
|
||||
print(data)
|
||||
|
||||
data.write_csv(out_directory / f"simulations_{name[0]}.csv")
|
||||
|
||||
vega_df = pl.read_csv("vega_results.csv")
|
||||
vega_df = vega_df.with_columns(system=pl.lit("vega"))
|
||||
|
||||
tesla_df = pl.read_csv("tesla_results.csv")
|
||||
tesla_df = tesla_df.with_columns(system=pl.lit("tesla"))
|
||||
|
||||
torch_df = pl.concat([vega_df, tesla_df])
|
||||
|
||||
torch_df = torch_df.pivot(index=["workload", "level"], columns=["system"], values=["runtime"])
|
||||
torch_df = torch_df.sort(pl.col("workload").replace(workload_order))
|
||||
torch_df = torch_df.with_columns(pl.col("workload").replace(workload_mapping))
|
||||
print(torch_df)
|
||||
|
||||
torch_df.write_csv(out_directory / "torch.csv")
|
||||
64
pim_plots.py
Normal file
64
pim_plots.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import polars as pl
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
out_directory = Path("pim_plots_out")
|
||||
|
||||
df = pl.read_csv("pim_results.csv")
|
||||
|
||||
workload_sets = {
|
||||
"vector": ["vadd", "vmul", "haxpy"],
|
||||
"matrix": ["gemv", "dnn"],
|
||||
}
|
||||
|
||||
workload_mapping = {
|
||||
"gemv_layers": "dnn",
|
||||
}
|
||||
|
||||
system_mapping = {
|
||||
"HBM": "hbm",
|
||||
"PIM-HBM": "pim"
|
||||
}
|
||||
|
||||
def calc_speedup(tick_list):
|
||||
return tick_list[0] / tick_list[1]
|
||||
|
||||
|
||||
df = df.with_columns(pl.col("workload").replace(workload_mapping))
|
||||
df = df.with_columns(pl.col("system").replace(system_mapping))
|
||||
|
||||
df = df.group_by(
|
||||
["workload", "level", "frequency"], maintain_order=True
|
||||
).agg(pl.col("ticks").map_elements(calc_speedup).alias("speedup"))
|
||||
|
||||
for name, data in df.group_by(
|
||||
"frequency",
|
||||
pl.when(pl.col("workload").is_in(workload_sets["vector"]))
|
||||
.then(pl.lit("vector"))
|
||||
.when(pl.col("workload").is_in(workload_sets["matrix"]))
|
||||
.then(pl.lit("matrix")),
|
||||
):
|
||||
plot = sns.catplot(
|
||||
data=data.to_pandas(),
|
||||
kind="bar",
|
||||
x="level",
|
||||
y="speedup",
|
||||
hue="workload",
|
||||
palette="dark",
|
||||
alpha=0.6,
|
||||
height=6,
|
||||
)
|
||||
plot.set_axis_labels("Level", "Speedup")
|
||||
plot.set(title=name[0] + name[1])
|
||||
|
||||
plot.fig.subplots_adjust(top=0.95)
|
||||
|
||||
data = data.pivot(index=["level"], columns=["workload"], values=["speedup"])
|
||||
print(data)
|
||||
|
||||
data.write_csv(out_directory / f"{name[1]}_{name[0]}.csv")
|
||||
|
||||
plt.show()
|
||||
104
simulation_script.py
Normal file
104
simulation_script.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import subprocess
|
||||
import dataclasses
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
from tqdm import tqdm
|
||||
from dataclasses import dataclass
|
||||
from threading import Thread
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from configs.pim_config import Configuration, Statistics
|
||||
|
||||
gem5 = Path("build/ARM/gem5.opt")
|
||||
out_dir_base = Path("pim_out")
|
||||
pim_simulation = Path("configs/pim_simulation.py")
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkItem:
|
||||
configuration: Configuration
|
||||
statistics: Statistics | None = None
|
||||
|
||||
def run_gem5_process(work_item: WorkItem):
|
||||
serialized_configuration = json.dumps(
|
||||
dataclasses.asdict(work_item.configuration)
|
||||
)
|
||||
|
||||
out_dir = out_dir_base / work_item.configuration.name
|
||||
|
||||
out = subprocess.run(
|
||||
[
|
||||
gem5,
|
||||
"-d" + out_dir.as_posix(),
|
||||
pim_simulation,
|
||||
serialized_configuration,
|
||||
],
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
output = out.stdout.splitlines()[-1]
|
||||
work_item.statistics = Statistics(**json.loads(output))
|
||||
|
||||
workload_base_directory = Path("kernels")
|
||||
workload_sub_directory = Path("aarch64-unknown-none/release")
|
||||
|
||||
workloads = [
|
||||
"vadd",
|
||||
"vmul",
|
||||
"haxpy",
|
||||
"gemv",
|
||||
"gemv_layers",
|
||||
]
|
||||
|
||||
systems = [
|
||||
"HBM",
|
||||
"PIM-HBM",
|
||||
]
|
||||
|
||||
configurations: list[Configuration] = []
|
||||
|
||||
for frequency in ["3GHz", "100GHz"]:
|
||||
# for frequency in ["100GHz"]:
|
||||
for level in ["X1", "X2", "X3", "X4"]:
|
||||
# for level in ["X3"]:
|
||||
for system in systems:
|
||||
for workload in workloads:
|
||||
executable = workload
|
||||
|
||||
if system == "HBM":
|
||||
executable = f"classic_{workload}"
|
||||
|
||||
executable = (
|
||||
workload_base_directory
|
||||
/ level
|
||||
/ workload_sub_directory
|
||||
/ executable
|
||||
)
|
||||
|
||||
configurations.append(
|
||||
Configuration(
|
||||
f"{workload}_{level}_{system}_{frequency}",
|
||||
workload,
|
||||
executable.as_posix(),
|
||||
level,
|
||||
system,
|
||||
frequency,
|
||||
)
|
||||
)
|
||||
|
||||
work_items = [WorkItem(configuration) for configuration in configurations]
|
||||
|
||||
with ThreadPool() as pool:
|
||||
for _ in tqdm(pool.imap_unordered(run_gem5_process, work_items), total=len(work_items)):
|
||||
pass
|
||||
|
||||
|
||||
results: list[dict] = []
|
||||
|
||||
for work_item in work_items:
|
||||
result = dataclasses.asdict(work_item.configuration) | dataclasses.asdict(work_item.statistics)
|
||||
results.append(result)
|
||||
|
||||
dataframe = pd.DataFrame(results)
|
||||
dataframe.to_csv("pim_results.csv", index=False)
|
||||
@@ -124,6 +124,7 @@ if env['HAVE_DRAMSIM3']:
|
||||
if env['HAVE_DRAMSYS']:
|
||||
SimObject('DRAMSys.py', sim_objects=['DRAMSys'])
|
||||
Source('dramsys_wrapper.cc')
|
||||
Source('dramsys.cc')
|
||||
|
||||
SimObject('MemChecker.py', sim_objects=['MemChecker', 'MemCheckerMonitor'])
|
||||
Source('mem_checker.cc')
|
||||
|
||||
143
src/mem/dramsys.cc
Normal file
143
src/mem/dramsys.cc
Normal file
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Fraunhofer IESE
|
||||
* All rights reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "dramsys.hh"
|
||||
#include "DRAMSys/common/Deserialize.h"
|
||||
#include "DRAMSys/common/Serialize.h"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
namespace memory
|
||||
{
|
||||
|
||||
DRAMSys::DRAMSys(Params const& params) :
|
||||
AbstractMemory(params),
|
||||
tlmWrapper(dramSysWrapper.tSocket, params.name + ".tlm", InvalidPortID),
|
||||
config(::DRAMSys::Config::from_path(params.configuration,
|
||||
params.resource_directory)),
|
||||
dramSysWrapper(
|
||||
params.name.c_str(), config, params.recordable, params.range)
|
||||
// checker("checker")
|
||||
{
|
||||
// checker.initiator_socket.bind(dramSysWrapper.tSocket);
|
||||
|
||||
dramSysWrapper.dramsys->registerIdleCallback(
|
||||
[this]
|
||||
{
|
||||
if (dramSysWrapper.dramsys->idle())
|
||||
{
|
||||
signalDrainDone();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
gem5::Port& DRAMSys::getPort(const std::string& if_name, PortID idx)
|
||||
{
|
||||
if (if_name != "tlm")
|
||||
{
|
||||
return AbstractMemory::getPort(if_name, idx);
|
||||
}
|
||||
|
||||
return tlmWrapper;
|
||||
}
|
||||
|
||||
DrainState DRAMSys::drain()
|
||||
{
|
||||
return dramSysWrapper.dramsys->idle() ? DrainState::Drained
|
||||
: DrainState::Draining;
|
||||
}
|
||||
|
||||
void DRAMSys::serialize(CheckpointOut& cp) const
|
||||
{
|
||||
std::filesystem::path checkpointPath = CheckpointIn::dir();
|
||||
|
||||
auto topLevelObjects = sc_core::sc_get_top_level_objects();
|
||||
for (auto const* object : topLevelObjects)
|
||||
{
|
||||
std::function<void(sc_core::sc_object const*)> serialize;
|
||||
serialize =
|
||||
[&serialize, &checkpointPath](sc_core::sc_object const* object)
|
||||
{
|
||||
auto const* serializableObject =
|
||||
dynamic_cast<::DRAMSys::Serialize const*>(object);
|
||||
|
||||
if (serializableObject != nullptr)
|
||||
{
|
||||
std::string dumpFileName(object->name());
|
||||
dumpFileName += ".pmem";
|
||||
std::ofstream stream(checkpointPath / dumpFileName,
|
||||
std::ios::binary);
|
||||
serializableObject->serialize(stream);
|
||||
}
|
||||
|
||||
for (auto const* childObject : object->get_child_objects())
|
||||
{
|
||||
serialize(childObject);
|
||||
}
|
||||
};
|
||||
|
||||
serialize(object);
|
||||
}
|
||||
}
|
||||
|
||||
void DRAMSys::unserialize(CheckpointIn& cp)
|
||||
{
|
||||
std::filesystem::path checkpointPath = CheckpointIn::dir();
|
||||
|
||||
auto topLevelObjects = sc_core::sc_get_top_level_objects();
|
||||
for (auto* object : topLevelObjects)
|
||||
{
|
||||
std::function<void(sc_core::sc_object*)> deserialize;
|
||||
deserialize =
|
||||
[&deserialize, &checkpointPath](sc_core::sc_object* object)
|
||||
{
|
||||
auto* deserializableObject =
|
||||
dynamic_cast<::DRAMSys::Deserialize*>(object);
|
||||
|
||||
if (deserializableObject != nullptr)
|
||||
{
|
||||
std::string dumpFileName(object->name());
|
||||
dumpFileName += ".pmem";
|
||||
std::ifstream stream(checkpointPath / dumpFileName,
|
||||
std::ios::binary);
|
||||
deserializableObject->deserialize(stream);
|
||||
}
|
||||
|
||||
for (auto* childObject : object->get_child_objects())
|
||||
{
|
||||
deserialize(childObject);
|
||||
}
|
||||
};
|
||||
|
||||
deserialize(object);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace memory
|
||||
} // namespace gem5
|
||||
@@ -29,10 +29,11 @@
|
||||
#ifndef __MEM_DRAMSYS_H__
|
||||
#define __MEM_DRAMSYS_H__
|
||||
|
||||
#include "DRAMSysConfiguration.h"
|
||||
#include "DRAMSys/config/DRAMSysConfiguration.h"
|
||||
#include "mem/abstract_mem.hh"
|
||||
#include "mem/dramsys_wrapper.hh"
|
||||
#include "params/DRAMSys.hh"
|
||||
// #include "tlm2_base_protocol_checker.h"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
@@ -43,37 +44,22 @@ namespace memory
|
||||
class DRAMSys : public AbstractMemory
|
||||
{
|
||||
PARAMS(DRAMSys);
|
||||
sc_gem5::TlmTargetWrapper<32> tlmWrapper;
|
||||
sc_gem5::TlmTargetWrapper<> tlmWrapper;
|
||||
|
||||
public:
|
||||
DRAMSys(Params const ¶ms)
|
||||
: AbstractMemory(params),
|
||||
tlmWrapper(dramSysWrapper.tSocket,
|
||||
params.name + ".tlm",
|
||||
InvalidPortID),
|
||||
config(DRAMSysConfiguration::from_path(
|
||||
params.configuration,
|
||||
params.resource_directory)),
|
||||
dramSysWrapper(params.name.c_str(),
|
||||
config,
|
||||
params.recordable,
|
||||
params.range)
|
||||
{
|
||||
}
|
||||
DRAMSys(Params const& params);
|
||||
|
||||
gem5::Port &getPort(const std::string &if_name, PortID idx) override
|
||||
{
|
||||
if (if_name != "tlm")
|
||||
{
|
||||
return AbstractMemory::getPort(if_name, idx);
|
||||
}
|
||||
gem5::Port& getPort(const std::string& if_name, PortID idx) override;
|
||||
|
||||
return tlmWrapper;
|
||||
}
|
||||
DrainState drain() override;
|
||||
|
||||
void serialize(CheckpointOut& cp) const override;
|
||||
void unserialize(CheckpointIn& cp) override;
|
||||
|
||||
private:
|
||||
DRAMSysConfiguration::Configuration config;
|
||||
::DRAMSys::Config::Configuration config;
|
||||
DRAMSysWrapper dramSysWrapper;
|
||||
// tlm_utils::tlm2_base_protocol_checker<> checker;
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
|
||||
@@ -36,7 +36,7 @@ namespace memory
|
||||
|
||||
DRAMSysWrapper::DRAMSysWrapper(
|
||||
sc_core::sc_module_name name,
|
||||
DRAMSysConfiguration::Configuration const &config,
|
||||
::DRAMSys::Config::Configuration const &config,
|
||||
bool recordable,
|
||||
AddrRange range) :
|
||||
sc_core::sc_module(name),
|
||||
@@ -44,28 +44,41 @@ DRAMSysWrapper::DRAMSysWrapper(
|
||||
range(range)
|
||||
{
|
||||
tSocket.register_nb_transport_fw(this, &DRAMSysWrapper::nb_transport_fw);
|
||||
tSocket.register_transport_dbg(this, &DRAMSysWrapper::transport_dbg);
|
||||
iSocket.register_nb_transport_bw(this, &DRAMSysWrapper::nb_transport_bw);
|
||||
|
||||
tSocket.register_b_transport(this, &DRAMSysWrapper::b_transport);
|
||||
|
||||
tSocket.register_transport_dbg(this, &DRAMSysWrapper::transport_dbg);
|
||||
iSocket.bind(dramsys->tSocket);
|
||||
|
||||
// Register a callback to compensate for the destructor not
|
||||
// being called.
|
||||
registerExitCallback(
|
||||
[this]()
|
||||
[]()
|
||||
{
|
||||
// Workaround for BUG GEM5-1233
|
||||
sc_gem5::Kernel::stop();
|
||||
});
|
||||
}
|
||||
|
||||
std::shared_ptr<::DRAMSys>
|
||||
std::shared_ptr<::DRAMSys::DRAMSys>
|
||||
DRAMSysWrapper::instantiateDRAMSys(
|
||||
bool recordable,
|
||||
DRAMSysConfiguration::Configuration const &config)
|
||||
::DRAMSys::Config::Configuration const &config)
|
||||
{
|
||||
return recordable
|
||||
? std::make_shared<::DRAMSysRecordable>("DRAMSys", config)
|
||||
: std::make_shared<::DRAMSys>("DRAMSys", config);
|
||||
? std::make_shared<::DRAMSys::DRAMSysRecordable>("DRAMSys", config)
|
||||
: std::make_shared<::DRAMSys::DRAMSys>("DRAMSys", config);
|
||||
}
|
||||
|
||||
void DRAMSysWrapper::b_transport(
|
||||
tlm::tlm_generic_payload &payload,
|
||||
sc_core::sc_time &delay)
|
||||
{
|
||||
// Subtract base address offset
|
||||
payload.set_address(payload.get_address() - range.start());
|
||||
|
||||
iSocket->b_transport(payload, delay);
|
||||
}
|
||||
|
||||
tlm::tlm_sync_enum DRAMSysWrapper::nb_transport_fw(
|
||||
|
||||
@@ -32,13 +32,14 @@
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include "DRAMSysConfiguration.h"
|
||||
#include "DRAMSys/config/DRAMSysConfiguration.h"
|
||||
#include "DRAMSys/simulation/DRAMSysRecordable.h"
|
||||
#include "mem/abstract_mem.hh"
|
||||
#include "params/DRAMSys.hh"
|
||||
#include "sim/core.hh"
|
||||
#include "simulation/DRAMSysRecordable.h"
|
||||
#include "systemc/core/kernel.hh"
|
||||
#include "systemc/ext/core/sc_module_name.hh"
|
||||
|
||||
#include "systemc/ext/systemc"
|
||||
#include "systemc/ext/tlm"
|
||||
#include "systemc/ext/tlm_utils/simple_target_socket.h"
|
||||
@@ -57,14 +58,14 @@ class DRAMSysWrapper : public sc_core::sc_module
|
||||
public:
|
||||
SC_HAS_PROCESS(DRAMSysWrapper);
|
||||
DRAMSysWrapper(sc_core::sc_module_name name,
|
||||
DRAMSysConfiguration::Configuration const &config,
|
||||
::DRAMSys::Config::Configuration const &config,
|
||||
bool recordable,
|
||||
AddrRange range);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<::DRAMSys>
|
||||
static std::shared_ptr<::DRAMSys::DRAMSys>
|
||||
instantiateDRAMSys(bool recordable,
|
||||
DRAMSysConfiguration::Configuration const &config);
|
||||
::DRAMSys::Config::Configuration const &config);
|
||||
|
||||
tlm::tlm_sync_enum nb_transport_fw(tlm::tlm_generic_payload &payload,
|
||||
tlm::tlm_phase &phase,
|
||||
@@ -74,12 +75,15 @@ class DRAMSysWrapper : public sc_core::sc_module
|
||||
tlm::tlm_phase &phase,
|
||||
sc_core::sc_time &bwDelay);
|
||||
|
||||
void b_transport(tlm::tlm_generic_payload &payload,
|
||||
sc_core::sc_time &delay);
|
||||
|
||||
unsigned int transport_dbg(tlm::tlm_generic_payload &trans);
|
||||
|
||||
tlm_utils::simple_initiator_socket<DRAMSysWrapper> iSocket;
|
||||
tlm_utils::simple_target_socket<DRAMSysWrapper> tSocket;
|
||||
|
||||
std::shared_ptr<::DRAMSys> dramsys;
|
||||
std::shared_ptr<::DRAMSys::DRAMSys> dramsys;
|
||||
|
||||
AddrRange range;
|
||||
};
|
||||
|
||||
1049
src/mem/tlm2_base_protocol_checker.h
Normal file
1049
src/mem/tlm2_base_protocol_checker.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -51,6 +51,7 @@ PySource('gem5.components.boards', 'gem5/components/boards/simple_board.py')
|
||||
PySource('gem5.components.boards', 'gem5/components/boards/test_board.py')
|
||||
PySource('gem5.components.boards', 'gem5/components/boards/x86_board.py')
|
||||
PySource('gem5.components.boards', 'gem5/components/boards/arm_board.py')
|
||||
PySource('gem5.components.boards', 'gem5/components/boards/arm_baremetal_board.py')
|
||||
PySource('gem5.components.boards',
|
||||
"gem5/components/boards/kernel_disk_workload.py")
|
||||
PySource('gem5.components.boards',
|
||||
|
||||
298
src/python/gem5/components/boards/arm_baremetal_board.py
Normal file
298
src/python/gem5/components/boards/arm_baremetal_board.py
Normal file
@@ -0,0 +1,298 @@
|
||||
# Copyright (c) 2022 The Regents of the University of California
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects import (
|
||||
Port,
|
||||
IOXBar,
|
||||
Bridge,
|
||||
BadAddr,
|
||||
Terminal,
|
||||
AddrRange,
|
||||
ArmSystem,
|
||||
ArmRelease,
|
||||
ArmFsWorkload,
|
||||
VoltageDomain,
|
||||
SrcClockDomain,
|
||||
ArmDefaultRelease,
|
||||
VExpress_GEM5_Base,
|
||||
VExpress_GEM5_Foundation,
|
||||
SimObject,
|
||||
VncServer,
|
||||
)
|
||||
|
||||
from abc import ABCMeta
|
||||
from ...isas import ISA
|
||||
from ...utils.requires import requires
|
||||
from ...utils.override import overrides
|
||||
from typing import List, Sequence, Tuple
|
||||
from .abstract_board import AbstractBoard
|
||||
from ...resources.resource import AbstractResource, BinaryResource
|
||||
from .kernel_disk_workload import KernelDiskWorkload
|
||||
from ..cachehierarchies.classic.no_cache import NoCache
|
||||
from ..processors.abstract_processor import AbstractProcessor
|
||||
from ..memory.abstract_memory_system import AbstractMemorySystem
|
||||
from ..cachehierarchies.abstract_cache_hierarchy import AbstractCacheHierarchy
|
||||
|
||||
|
||||
class ArmBareMetalBoard(ArmSystem, AbstractBoard):
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
clk_freq: str,
|
||||
processor: AbstractProcessor,
|
||||
memory: AbstractMemorySystem,
|
||||
cache_hierarchy: AbstractCacheHierarchy,
|
||||
platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
|
||||
release: ArmRelease = ArmDefaultRelease(),
|
||||
) -> None:
|
||||
# The platform and the clk has to be set before calling the super class
|
||||
self._platform = platform
|
||||
self._clk_freq = clk_freq
|
||||
|
||||
super().__init__()
|
||||
AbstractBoard.__init__(
|
||||
self,
|
||||
clk_freq=clk_freq,
|
||||
processor=processor,
|
||||
memory=memory,
|
||||
cache_hierarchy=cache_hierarchy,
|
||||
)
|
||||
|
||||
# This board requires ARM ISA to work.
|
||||
requires(isa_required=ISA.ARM)
|
||||
|
||||
# Setting up ARM release here. We use the ARM default release, which
|
||||
# corresponds to an ARMv8 system.
|
||||
self.release = release
|
||||
|
||||
# Setting multi_proc of ArmSystem by counting the number of processors.
|
||||
if processor.get_num_cores() == 1:
|
||||
self.multi_proc = False
|
||||
else:
|
||||
self.multi_proc = True
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def _setup_board(self) -> None:
|
||||
# This board is expected to run full-system simulation.
|
||||
# Loading ArmFsWorkload() from `src/arch/arm/ArmFsWorkload.py`
|
||||
self.workload = ArmFsWorkload()
|
||||
|
||||
# We are fixing the following variable for the ArmSystem to work. The
|
||||
# security extension is checked while generating the dtb file in
|
||||
# realview. This board does not have security extension enabled.
|
||||
self._have_psci = False
|
||||
|
||||
# highest_el_is_64 is set to True. True if the register width of the
|
||||
# highest implemented exception level is 64 bits.
|
||||
self.highest_el_is_64 = True
|
||||
|
||||
# Setting up the voltage and the clock domain here for the ARM board.
|
||||
# The ArmSystem/RealView expects voltage_domain to be a parameter.
|
||||
# The voltage and the clock frequency are taken from the devices.py
|
||||
# file from configs/example/arm. We set the clock to the same frequency
|
||||
# as the user specified in the config script.
|
||||
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock=self._clk_freq, voltage_domain=self.voltage_domain
|
||||
)
|
||||
|
||||
# The ARM board supports both Terminal and VncServer.
|
||||
self.terminal = Terminal()
|
||||
self.vncserver = VncServer()
|
||||
|
||||
# Incoherent I/O Bus
|
||||
self.iobus = IOXBar()
|
||||
self.iobus.badaddr_responder = BadAddr()
|
||||
self.iobus.default = self.iobus.badaddr_responder.pio
|
||||
|
||||
# We now need to setup the dma_ports.
|
||||
self._dma_ports = None
|
||||
|
||||
# RealView sets up most of the on-chip and off-chip devices and GIC
|
||||
# for the ARM board. These devices' information is also used to
|
||||
# generate the dtb file. We then connect the I/O devices to the
|
||||
# I/O bus.
|
||||
self._setup_io_devices()
|
||||
|
||||
# Once the realview is setup, we can continue setting up the memory
|
||||
# ranges. ArmBoard's memory can only be setup once realview is
|
||||
# initialized.
|
||||
memory = self.get_memory()
|
||||
mem_size = memory.get_size()
|
||||
|
||||
# The following code is taken from configs/example/arm/devices.py. It
|
||||
# sets up all the memory ranges for the board.
|
||||
self.mem_ranges = []
|
||||
success = False
|
||||
for mem_range in self.realview._mem_regions:
|
||||
size_in_range = min(mem_size, mem_range.size())
|
||||
self.mem_ranges.append(
|
||||
AddrRange(start=mem_range.start, size=size_in_range)
|
||||
)
|
||||
|
||||
mem_size -= size_in_range
|
||||
if mem_size == 0:
|
||||
success = True
|
||||
break
|
||||
|
||||
if success:
|
||||
memory.set_memory_range(self.mem_ranges)
|
||||
else:
|
||||
raise ValueError("Memory size too big for platform capabilities")
|
||||
|
||||
def _setup_io_devices(self) -> None:
|
||||
"""
|
||||
This method first sets up the platform. ARM uses `realview` platform.
|
||||
Most of the on-chip and off-chip devices are setup by the realview
|
||||
platform. Once realview is setup, we connect the I/O devices to the
|
||||
I/O bus.
|
||||
"""
|
||||
|
||||
# Currently, the ArmBoard supports VExpress_GEM5_V1,
|
||||
# VExpress_GEM5_V1_HDLcd and VExpress_GEM5_Foundation.
|
||||
# VExpress_GEM5_V2 and VExpress_GEM5_V2_HDLcd are not supported by the
|
||||
# ArmBoard.
|
||||
self.realview = self._platform
|
||||
|
||||
# We need to setup the global interrupt controller (GIC) addr for the
|
||||
# realview system.
|
||||
if hasattr(self.realview.gic, "cpu_addr"):
|
||||
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
||||
|
||||
# IO devices has to setup before incorporating the caches in the case
|
||||
# of ruby caches. Otherwise the DMA controllers are incorrectly
|
||||
# created. The IO device has to be attached first. This is done in the
|
||||
# realview class.
|
||||
if self.get_cache_hierarchy().is_ruby():
|
||||
# All the on-chip devices are attached in this method.
|
||||
self.realview.attachOnChipIO(
|
||||
self.iobus,
|
||||
dma_ports=self.get_dma_ports(),
|
||||
mem_ports=self.get_memory().get_mem_ports(),
|
||||
)
|
||||
self.realview.attachIO(self.iobus, dma_ports=self.get_dma_ports())
|
||||
|
||||
else:
|
||||
# We either have iocache or dmabridge depending upon the
|
||||
# cache_hierarchy. If we have "NoCache", then we use the dmabridge.
|
||||
# Otherwise, we use the iocache on the board.
|
||||
|
||||
# We setup the iobridge for the ARM Board. The default
|
||||
# cache_hierarchy's NoCache class has an iobridge has a latency
|
||||
# of 10. We are using an iobridge with latency = 50ns, taken
|
||||
# from the configs/example/arm/devices.py.
|
||||
self.iobridge = Bridge(delay="50ns")
|
||||
self.iobridge.mem_side_port = self.iobus.cpu_side_ports
|
||||
self.iobridge.cpu_side_port = (
|
||||
self.cache_hierarchy.get_mem_side_port()
|
||||
)
|
||||
|
||||
if isinstance(self.cache_hierarchy, NoCache) is True:
|
||||
# This corresponds to a machine without caches. We have a DMA
|
||||
# bridge in this case. Parameters of this bridge are also taken
|
||||
# from the common/example/arm/devices.py file.
|
||||
self.dmabridge = Bridge(delay="50ns", ranges=self.mem_ranges)
|
||||
self.dmabridge.mem_side_port = (
|
||||
self.cache_hierarchy.get_cpu_side_port()
|
||||
)
|
||||
self.dmabridge.cpu_side_port = self.iobus.mem_side_ports
|
||||
|
||||
# The classic caches are setup in the _setup_io_cache() method
|
||||
# defined under the cachehierarchy class. Verified it with both
|
||||
# PrivateL1PrivateL2CacheHierarchy and PrivateL1CacheHierarchy
|
||||
# classes.
|
||||
self.realview.attachOnChipIO(
|
||||
self.cache_hierarchy.membus, self.iobridge
|
||||
)
|
||||
self.realview.attachIO(self.iobus)
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def has_io_bus(self) -> bool:
|
||||
return True
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def get_io_bus(self) -> IOXBar:
|
||||
return self.iobus
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def has_coherent_io(self) -> bool:
|
||||
# The setup of the caches gets a little tricky here. We need to
|
||||
# override the default cache_hierarchy.iobridge due to different delay
|
||||
# values (see method _setup_io_devices()). One way to do it would be to
|
||||
# prevent creating cache_hierarchy.iobridge altogether. We trick
|
||||
# NoCache() to assume that this board has no coherent_io and we we
|
||||
# simply setup our own iobridge in the _setup_io_devices() method.
|
||||
if isinstance(self.cache_hierarchy, NoCache):
|
||||
return False
|
||||
# In all other cases, we use the default values setup in the
|
||||
# respective cache hierarchy class.
|
||||
return True
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def get_mem_side_coherent_io_port(self) -> Port:
|
||||
return self.iobus.mem_side_ports
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def has_dma_ports(self) -> bool:
|
||||
return True
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def get_dma_ports(self) -> List[Port]:
|
||||
# The DMA ports differ depending upon the cache hierarchy. The method
|
||||
# self.set_dma_ports takes care of that. In the case of ruby caches,
|
||||
# this method should initially return an empty list.
|
||||
if self.cache_hierarchy.is_ruby():
|
||||
if self._dma_ports is None:
|
||||
self._dma_ports = []
|
||||
|
||||
# _dma_ports should always be empty for classic caches.
|
||||
return self._dma_ports
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def connect_system_port(self, port: Port) -> None:
|
||||
self.system_port = port
|
||||
|
||||
@overrides(AbstractBoard)
|
||||
def _setup_memory_ranges(self) -> None:
|
||||
"""
|
||||
The ArmBoard's memory can only be setup after realview is setup. We set
|
||||
this up in the `_setup_board` function.
|
||||
"""
|
||||
pass
|
||||
|
||||
def set_baremetal_workload(self, kernel: BinaryResource) -> None:
|
||||
self._set_fullsystem(True)
|
||||
self.workload.object_file = kernel.get_local_path()
|
||||
|
||||
@overrides(SimObject)
|
||||
def createCCObject(self):
|
||||
"""We override this function as it is called in `m5.instantiate`. This
|
||||
means we can insert a check to ensure the `_connect_things` function
|
||||
has been run.
|
||||
"""
|
||||
super()._connect_things_check()
|
||||
super().createCCObject()
|
||||
@@ -24,7 +24,8 @@
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import m5
|
||||
from typing import Tuple, Sequence, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from m5.objects import (
|
||||
DRAMSys,
|
||||
@@ -40,27 +41,46 @@ from ...utils.override import overrides
|
||||
from ..boards.abstract_board import AbstractBoard
|
||||
from .abstract_memory_system import AbstractMemorySystem
|
||||
|
||||
from typing import Tuple, Sequence, List
|
||||
|
||||
DEFAULT_DRAMSYS_DIRECTORY = Path("ext/dramsys/DRAMSys")
|
||||
|
||||
|
||||
class DRAMSysMem(AbstractMemorySystem):
|
||||
"""
|
||||
A DRAMSys memory controller.
|
||||
|
||||
This class requires gem5 to be built with DRAMSys (see ext/dramsys).
|
||||
The specified memory size does not control the simulated memory size but it's sole purpose is
|
||||
to notify gem5 of DRAMSys's memory size.
|
||||
Therefore it has to match the DRAMSys configuration.
|
||||
DRAMSys is configured using JSON files, whose base configuration has to be passed as a
|
||||
parameter. Sub-configs are specified relative to the optional resource directory parameter.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
configuration: str,
|
||||
size: str,
|
||||
resource_directory: str,
|
||||
recordable: bool,
|
||||
resource_directory: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
:param configuration: Path to the base configuration JSON for DRAMSys.
|
||||
:param size: Memory size of DRAMSys. Must match the size specified in JSON configuration.
|
||||
:param resource_directory: Path to the base resource directory for DRAMSys.
|
||||
:param recordable: Whether the database recording feature of DRAMSys is enabled.
|
||||
:param resource_directory: Path to the base resource directory for DRAMSys.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
resource_directory_path = (
|
||||
DEFAULT_DRAMSYS_DIRECTORY / "configs"
|
||||
if resource_directory is None
|
||||
else Path(resource_directory)
|
||||
)
|
||||
|
||||
self.dramsys = DRAMSys(
|
||||
configuration=configuration,
|
||||
resource_directory=resource_directory,
|
||||
resource_directory=resource_directory_path.as_posix(),
|
||||
recordable=recordable,
|
||||
)
|
||||
|
||||
@@ -97,56 +117,72 @@ class DRAMSysMem(AbstractMemorySystem):
|
||||
|
||||
|
||||
class DRAMSysDDR4_1866(DRAMSysMem):
|
||||
"""
|
||||
An example DDR4 1866 DRAMSys configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, recordable: bool):
|
||||
"""
|
||||
:param recordable: Whether the database recording feature of DRAMSys is enabled.
|
||||
"""
|
||||
super().__init__(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/ddr4-example.json",
|
||||
configuration=(
|
||||
DEFAULT_DRAMSYS_DIRECTORY / "configs/ddr4-example.json"
|
||||
).as_posix(),
|
||||
size="4GB",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
recordable=recordable,
|
||||
)
|
||||
|
||||
|
||||
class DRAMSysDDR3_1600(DRAMSysMem):
|
||||
"""
|
||||
An example DDR3 1600 DRAMSys configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, recordable: bool):
|
||||
"""
|
||||
:param recordable: Whether the database recording feature of DRAMSys is enabled.
|
||||
"""
|
||||
super().__init__(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/ddr3-gem5-se.json",
|
||||
size="4GB",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
configuration=(
|
||||
DEFAULT_DRAMSYS_DIRECTORY / "configs/ddr3-gem5-se.json"
|
||||
).as_posix(),
|
||||
size="1GB",
|
||||
recordable=recordable,
|
||||
)
|
||||
|
||||
|
||||
class DRAMSysLPDDR4_3200(DRAMSysMem):
|
||||
"""
|
||||
An example LPDDR4 3200 DRAMSys configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, recordable: bool):
|
||||
"""
|
||||
:param recordable: Whether the database recording feature of DRAMSys is enabled.
|
||||
"""
|
||||
super().__init__(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/lpddr4-example.json",
|
||||
size="4GB",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
configuration=(
|
||||
DEFAULT_DRAMSYS_DIRECTORY / "configs/lpddr4-example.json"
|
||||
).as_posix(),
|
||||
size="1GB",
|
||||
recordable=recordable,
|
||||
)
|
||||
|
||||
|
||||
class DRAMSysHBM2(DRAMSysMem):
|
||||
"""
|
||||
An example HBM2 DRAMSys configuration.
|
||||
"""
|
||||
|
||||
def __init__(self, recordable: bool):
|
||||
"""
|
||||
:param recordable: Whether the database recording feature of DRAMSys is enabled.
|
||||
"""
|
||||
super().__init__(
|
||||
configuration="ext/dramsys/DRAMSys/DRAMSys/"
|
||||
"library/resources/simulations/hbm2-example.json",
|
||||
size="4GB",
|
||||
resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
|
||||
configuration=(
|
||||
DEFAULT_DRAMSYS_DIRECTORY / "configs/hbm2-example.json"
|
||||
).as_posix(),
|
||||
size="2GB",
|
||||
recordable=recordable,
|
||||
)
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
#include "params/Gem5ToTlmBridge512.hh"
|
||||
#include "sim/eventq.hh"
|
||||
#include "sim/system.hh"
|
||||
#include "systemc/ext/tlm_core/2/generic_payload/gp.hh"
|
||||
#include "systemc/tlm_bridge/sc_ext.hh"
|
||||
#include "systemc/tlm_bridge/sc_mm.hh"
|
||||
|
||||
@@ -152,6 +153,7 @@ packet2payload(PacketPtr packet)
|
||||
trans->acquire();
|
||||
|
||||
trans->set_address(packet->getAddr());
|
||||
trans->set_response_status(tlm::TLM_INCOMPLETE_RESPONSE);
|
||||
|
||||
/* Check if this transaction was allocated by mm */
|
||||
sc_assert(trans->has_mm());
|
||||
@@ -480,7 +482,7 @@ Gem5ToTlmBridge<BITWIDTH>::recvRespRetry()
|
||||
|
||||
tlm::tlm_generic_payload *trans = blockingResponse;
|
||||
blockingResponse = nullptr;
|
||||
PacketPtr packet = packetMap[blockingResponse];
|
||||
PacketPtr packet = packetMap[trans];
|
||||
sc_assert(packet);
|
||||
|
||||
bool need_retry = !bridgeResponsePort.sendTimingResp(packet);
|
||||
|
||||
59
torch_plots.py
Normal file
59
torch_plots.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import polars as pl
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
out_directory = Path("torch_plots_out")
|
||||
|
||||
system_mapping = {
|
||||
"HBM": "hbm",
|
||||
"PIM-HBM": "pim"
|
||||
}
|
||||
|
||||
gem_df = pl.read_csv("pim_results.csv")
|
||||
gem_df = gem_df.with_columns(pl.col("system").replace(system_mapping))
|
||||
gem_df = gem_df.with_columns(
|
||||
pl.concat_str(["system", "frequency"], separator="_").alias("system")
|
||||
)
|
||||
gem_df = gem_df.select(["workload", "level", "system", "ticks"])
|
||||
|
||||
vega_df = pl.read_csv("vega_results.csv")
|
||||
vega_df = vega_df.rename({"runtime": "ticks"})
|
||||
vega_df = vega_df.with_columns(pl.lit("vega").alias("system"))
|
||||
|
||||
tesla_df = pl.read_csv("tesla_results.csv")
|
||||
tesla_df = tesla_df.rename({"runtime": "ticks"})
|
||||
tesla_df = tesla_df.with_columns(pl.lit("tesla").alias("system"))
|
||||
|
||||
df = pl.concat([gem_df, vega_df, tesla_df], how="diagonal")
|
||||
|
||||
workload_sets = [["vadd", "vmul", "haxpy"], ["gemv", "dnn"]]
|
||||
|
||||
workload_mapping = {
|
||||
"gemv_layers": "dnn",
|
||||
}
|
||||
|
||||
df = df.with_columns(pl.col("workload").replace(workload_mapping))
|
||||
|
||||
# for workload_set in workload_sets:
|
||||
# temp_df = df.filter(pl.col("workload").is_in(workload_set))
|
||||
|
||||
g = sns.catplot(
|
||||
data=df.to_pandas(),
|
||||
kind="bar",
|
||||
x="level",
|
||||
y="ticks",
|
||||
hue="system",
|
||||
col="workload",
|
||||
palette="dark",
|
||||
alpha=0.6,
|
||||
height=6,
|
||||
)
|
||||
|
||||
for name, data in df.group_by("system"):
|
||||
data = data.pivot(index=["level"], columns=["workload"], values=["ticks"])
|
||||
data.write_csv(out_directory / f"{name}.csv")
|
||||
print(data)
|
||||
|
||||
plt.show()
|
||||
52
wallclock_time_plots.py
Normal file
52
wallclock_time_plots.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import polars as pl
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import timedelta
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
stats_dir = Path("pim_out")
|
||||
|
||||
runtime_dict: dict[str, list[any]] = {}
|
||||
|
||||
for element in stats_dir.iterdir():
|
||||
print(element.name)
|
||||
matches = re.search(r'(\w+)_(\w+)_(\w*-*\w*)_(\w+)', element.name)
|
||||
workload, level, system, freq = matches.group(1), matches.group(2), matches.group(3), matches.group(4)
|
||||
|
||||
with open(element / "stats.txt") as f:
|
||||
regex = re.compile(r'hostSeconds\ +(\d+.\d+).*')
|
||||
for line in f:
|
||||
result = regex.search(line)
|
||||
if result is not None:
|
||||
# implicitly only get last match in file...
|
||||
runtime = result.group(1)
|
||||
|
||||
runtime_dict.setdefault("workload", []).append(workload)
|
||||
runtime_dict.setdefault("level", []).append(level)
|
||||
runtime_dict.setdefault("system", []).append(system)
|
||||
runtime_dict.setdefault("freq", []).append(freq)
|
||||
runtime_dict.setdefault("runtime", []).append(float(runtime))
|
||||
|
||||
df = pl.DataFrame(runtime_dict)
|
||||
df = df.filter((pl.col("freq") == "100GHz") & (pl.col("level") == "X3"))
|
||||
df = df.drop("freq")
|
||||
print(df)
|
||||
|
||||
plot = sns.catplot(
|
||||
data=df.to_pandas(),
|
||||
kind="bar",
|
||||
x="system",
|
||||
y="runtime",
|
||||
hue="workload",
|
||||
palette="dark",
|
||||
alpha=0.6,
|
||||
height=6,
|
||||
)
|
||||
plot.set_axis_labels("PIM vs. Non-PIM", "Runtime [s]")
|
||||
plot.set(title="Wallclock Time")
|
||||
|
||||
plot.fig.subplots_adjust(top=0.95)
|
||||
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user