Wallclock time plots

This commit is contained in:
2024-03-22 18:42:18 +01:00
committed by Derek Christ
parent 353488837c
commit 43fbdd853f
6 changed files with 238 additions and 67 deletions

View File

@@ -7,7 +7,7 @@ class Configuration:
workload: str workload: str
executable: Path executable: Path
level: str level: str
pim: bool system: str
frequency: str = "3GHz" frequency: str = "3GHz"
@dataclass(frozen=True) @dataclass(frozen=True)

45
latex_table.py Normal file
View File

@@ -0,0 +1,45 @@
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import numpy as np
from pathlib import Path
workload_order = {val: idx for idx, val in enumerate(["vadd", "vmul", "haxpy", "gemv", "gemv_layers"])}
workload_mapping = {
"vadd": "VADD",
"vmul": "VMUL",
"haxpy": "HAXPY",
"gemv": "GEMV",
"gemv_layers": "DNN",
}
out_directory = Path("tables_out")
df = pl.read_csv("pim_results.csv")
df = df.select(["workload", "level", "system", "frequency", "ticks"])
for name, data in df.group_by(["frequency"], maintain_order=True):
data = data.pivot(index=["workload", "level"], columns=["system"], values=["ticks"])
data = data.sort(pl.col("workload").replace(workload_order))
data = data.with_columns(pl.col("workload").replace(workload_mapping))
data = data.rename({"HBM": "hbm", "PIM-HBM": "pim"})
print(data)
data.write_csv(out_directory / f"simulations_{name[0]}.csv")
vega_df = pl.read_csv("vega_results.csv")
vega_df = vega_df.with_columns(system=pl.lit("vega"))
tesla_df = pl.read_csv("tesla_results.csv")
tesla_df = tesla_df.with_columns(system=pl.lit("tesla"))
torch_df = pl.concat([vega_df, tesla_df])
torch_df = torch_df.pivot(index=["workload", "level"], columns=["system"], values=["runtime"])
torch_df = torch_df.sort(pl.col("workload").replace(workload_order))
torch_df = torch_df.with_columns(pl.col("workload").replace(workload_mapping))
print(torch_df)
torch_df.write_csv(out_directory / "torch.csv")

View File

@@ -1,48 +1,64 @@
import matplotlib.pyplot as plt import polars as pl
import seaborn as sns import seaborn as sns
import pandas as pd import matplotlib.pyplot as plt
import numpy as np from datetime import datetime
from pathlib import Path from pathlib import Path
df = pd.read_csv("pim_results.csv") out_directory = Path("pim_plots_out")
sns.set_theme() df = pl.read_csv("pim_results.csv")
def calc_speedup(x): workload_sets = {
return x.iat[0] / x.iat[1] "vector": ["vadd", "vmul", "haxpy"],
"matrix": ["gemv", "dnn"],
}
workload_sets = [["vadd", "vmul", "haxpy"], ["gemv", "gemv_layers"]] workload_mapping = {
"gemv_layers": "dnn",
}
for workload_set in workload_sets: system_mapping = {
workload_filter = df["workload"].isin(workload_set) "HBM": "hbm",
"PIM-HBM": "pim"
}
for frequency in df["frequency"].unique(): def calc_speedup(tick_list):
frequency_filter = df["frequency"] == frequency return tick_list[0] / tick_list[1]
filtered_df = df[workload_filter & frequency_filter]
print(filtered_df)
preprocessed_df = filtered_df.groupby(["workload", "level", "frequency"], as_index=False).agg({"ticks": calc_speedup}).rename(columns={"ticks":"speedup"})
print(preprocessed_df) df = df.with_columns(pl.col("workload").replace(workload_mapping))
# preprocessed_df.to_csv("plot.csv", index=False) df = df.with_columns(pl.col("system").replace(system_mapping))
g = sns.catplot( df = df.group_by(
data=preprocessed_df, kind="bar", ["workload", "level", "frequency"], maintain_order=True
x="level", y="speedup", hue="workload", ).agg(pl.col("ticks").map_elements(calc_speedup).alias("speedup"))
palette="dark", alpha=.6, height=6
)
g.despine(left=True) for name, data in df.group_by(
g.set_axis_labels("", "Speedup") "frequency",
g.set(title=frequency) pl.when(pl.col("workload").is_in(workload_sets["vector"]))
g.legend.set_title("") .then(pl.lit("vector"))
.when(pl.col("workload").is_in(workload_sets["matrix"]))
.then(pl.lit("matrix")),
):
plot = sns.catplot(
data=data.to_pandas(),
kind="bar",
x="level",
y="speedup",
hue="workload",
palette="dark",
alpha=0.6,
height=6,
)
plot.set_axis_labels("Level", "Speedup")
plot.set(title=name[0] + name[1])
for workload in workload_set: plot.fig.subplots_adjust(top=0.95)
export_df = preprocessed_df[preprocessed_df["workload"] == workload]
filename = f"{workload}_{frequency}.csv" data = data.pivot(index=["level"], columns=["workload"], values=["speedup"])
directory = Path("plots_out") print(data)
export_df.to_csv(directory / filename, index=False)
data.write_csv(out_directory / f"{name[1]}_{name[0]}.csv")
plt.show() plt.show()

View File

@@ -1,13 +1,13 @@
import subprocess import subprocess
import csv
import copy
import dataclasses import dataclasses
import json import json
import pandas as pd import pandas as pd
from tqdm import tqdm
from dataclasses import dataclass
from threading import Thread from threading import Thread
from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from typing import Dict
from configs.pim_config import Configuration, Statistics from configs.pim_config import Configuration, Statistics
gem5 = Path("build/ARM/gem5.opt") gem5 = Path("build/ARM/gem5.opt")
@@ -15,31 +15,30 @@ out_dir_base = Path("pim_out")
pim_simulation = Path("configs/pim_simulation.py") pim_simulation = Path("configs/pim_simulation.py")
class Gem5Thread(Thread): @dataclass
def __init__(self, configuration: Configuration) -> None: class WorkItem:
super().__init__() configuration: Configuration
self.configuration = configuration statistics: Statistics | None = None
def run(self): def run_gem5_process(work_item: WorkItem):
serialized_configuration = json.dumps( serialized_configuration = json.dumps(
dataclasses.asdict(self.configuration) dataclasses.asdict(work_item.configuration)
) )
out_dir = out_dir_base / configuration.name out_dir = out_dir_base / work_item.configuration.name
out = subprocess.run( out = subprocess.run(
[ [
gem5, gem5,
"-d" + out_dir.as_posix(), "-d" + out_dir.as_posix(),
pim_simulation, pim_simulation,
serialized_configuration, serialized_configuration,
], ],
capture_output=True, capture_output=True,
) )
output = out.stdout.splitlines()[-1]
self.statistics = Statistics(**json.loads(output))
output = out.stdout.splitlines()[-1]
work_item.statistics = Statistics(**json.loads(output))
workload_base_directory = Path("kernels") workload_base_directory = Path("kernels")
workload_sub_directory = Path("aarch64-unknown-none/release") workload_sub_directory = Path("aarch64-unknown-none/release")
@@ -60,7 +59,9 @@ systems = [
configurations: list[Configuration] = [] configurations: list[Configuration] = []
for frequency in ["3GHz", "100GHz"]: for frequency in ["3GHz", "100GHz"]:
# for frequency in ["100GHz"]:
for level in ["X1", "X2", "X3", "X4"]: for level in ["X1", "X2", "X3", "X4"]:
# for level in ["X3"]:
for system in systems: for system in systems:
for workload in workloads: for workload in workloads:
executable = workload executable = workload
@@ -77,28 +78,26 @@ for frequency in ["3GHz", "100GHz"]:
configurations.append( configurations.append(
Configuration( Configuration(
f"{workload}_{level}_{frequency}", f"{workload}_{level}_{system}_{frequency}",
workload, workload,
executable.as_posix(), executable.as_posix(),
level, level,
system == "PIM-HBM", system,
frequency, frequency,
) )
) )
threads: list[Gem5Thread] = [] work_items = [WorkItem(configuration) for configuration in configurations]
with ThreadPool() as pool:
for _ in tqdm(pool.imap_unordered(run_gem5_process, work_items), total=len(work_items)):
pass
for configuration in configurations:
thread = Gem5Thread(configuration)
thread.start()
threads.append(thread)
results: list[dict] = [] results: list[dict] = []
for thread in threads: for work_item in work_items:
thread.join() result = dataclasses.asdict(work_item.configuration) | dataclasses.asdict(work_item.statistics)
result = dataclasses.asdict(thread.configuration) | dataclasses.asdict(thread.statistics)
results.append(result) results.append(result)
dataframe = pd.DataFrame(results) dataframe = pd.DataFrame(results)

59
torch_plots.py Normal file
View File

@@ -0,0 +1,59 @@
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
from pathlib import Path
out_directory = Path("torch_plots_out")
system_mapping = {
"HBM": "hbm",
"PIM-HBM": "pim"
}
gem_df = pl.read_csv("pim_results.csv")
gem_df = gem_df.with_columns(pl.col("system").replace(system_mapping))
gem_df = gem_df.with_columns(
pl.concat_str(["system", "frequency"], separator="_").alias("system")
)
gem_df = gem_df.select(["workload", "level", "system", "ticks"])
vega_df = pl.read_csv("vega_results.csv")
vega_df = vega_df.rename({"runtime": "ticks"})
vega_df = vega_df.with_columns(pl.lit("vega").alias("system"))
tesla_df = pl.read_csv("tesla_results.csv")
tesla_df = tesla_df.rename({"runtime": "ticks"})
tesla_df = tesla_df.with_columns(pl.lit("tesla").alias("system"))
df = pl.concat([gem_df, vega_df, tesla_df], how="diagonal")
workload_sets = [["vadd", "vmul", "haxpy"], ["gemv", "dnn"]]
workload_mapping = {
"gemv_layers": "dnn",
}
df = df.with_columns(pl.col("workload").replace(workload_mapping))
# for workload_set in workload_sets:
# temp_df = df.filter(pl.col("workload").is_in(workload_set))
g = sns.catplot(
data=df.to_pandas(),
kind="bar",
x="level",
y="ticks",
hue="system",
col="workload",
palette="dark",
alpha=0.6,
height=6,
)
for name, data in df.group_by("system"):
data = data.pivot(index=["level"], columns=["workload"], values=["ticks"])
data.write_csv(out_directory / f"{name}.csv")
print(data)
plt.show()

52
wallclock_time_plots.py Normal file
View File

@@ -0,0 +1,52 @@
import re
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from pathlib import Path
stats_dir = Path("pim_out")
runtime_dict: dict[str, list[any]] = {}
for element in stats_dir.iterdir():
print(element.name)
matches = re.search(r'(\w+)_(\w+)_(\w*-*\w*)_(\w+)', element.name)
workload, level, system, freq = matches.group(1), matches.group(2), matches.group(3), matches.group(4)
with open(element / "stats.txt") as f:
regex = re.compile(r'hostSeconds\ +(\d+.\d+).*')
for line in f:
result = regex.search(line)
if result is not None:
# implicitly only get last match in file...
runtime = result.group(1)
runtime_dict.setdefault("workload", []).append(workload)
runtime_dict.setdefault("level", []).append(level)
runtime_dict.setdefault("system", []).append(system)
runtime_dict.setdefault("freq", []).append(freq)
runtime_dict.setdefault("runtime", []).append(float(runtime))
df = pl.DataFrame(runtime_dict)
df = df.filter((pl.col("freq") == "100GHz") & (pl.col("level") == "X3"))
df = df.drop("freq")
print(df)
plot = sns.catplot(
data=df.to_pandas(),
kind="bar",
x="system",
y="runtime",
hue="workload",
palette="dark",
alpha=0.6,
height=6,
)
plot.set_axis_labels("PIM vs. Non-PIM", "Runtime [s]")
plot.set(title="Wallclock Time")
plot.fig.subplots_adjust(top=0.95)
plt.show()