Wallclock time plots

This commit is contained in:
2024-03-22 18:42:18 +01:00
committed by Derek Christ
parent 353488837c
commit 43fbdd853f
6 changed files with 238 additions and 67 deletions

View File

@@ -7,7 +7,7 @@ class Configuration:
workload: str
executable: Path
level: str
pim: bool
system: str
frequency: str = "3GHz"
@dataclass(frozen=True)

45
latex_table.py Normal file
View File

@@ -0,0 +1,45 @@
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import numpy as np
from pathlib import Path
workload_order = {val: idx for idx, val in enumerate(["vadd", "vmul", "haxpy", "gemv", "gemv_layers"])}
workload_mapping = {
"vadd": "VADD",
"vmul": "VMUL",
"haxpy": "HAXPY",
"gemv": "GEMV",
"gemv_layers": "DNN",
}
out_directory = Path("tables_out")
df = pl.read_csv("pim_results.csv")
df = df.select(["workload", "level", "system", "frequency", "ticks"])
for name, data in df.group_by(["frequency"], maintain_order=True):
data = data.pivot(index=["workload", "level"], columns=["system"], values=["ticks"])
data = data.sort(pl.col("workload").replace(workload_order))
data = data.with_columns(pl.col("workload").replace(workload_mapping))
data = data.rename({"HBM": "hbm", "PIM-HBM": "pim"})
print(data)
data.write_csv(out_directory / f"simulations_{name[0]}.csv")
vega_df = pl.read_csv("vega_results.csv")
vega_df = vega_df.with_columns(system=pl.lit("vega"))
tesla_df = pl.read_csv("tesla_results.csv")
tesla_df = tesla_df.with_columns(system=pl.lit("tesla"))
torch_df = pl.concat([vega_df, tesla_df])
torch_df = torch_df.pivot(index=["workload", "level"], columns=["system"], values=["runtime"])
torch_df = torch_df.sort(pl.col("workload").replace(workload_order))
torch_df = torch_df.with_columns(pl.col("workload").replace(workload_mapping))
print(torch_df)
torch_df.write_csv(out_directory / "torch.csv")

View File

@@ -1,48 +1,64 @@
import matplotlib.pyplot as plt
import polars as pl
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path
df = pd.read_csv("pim_results.csv")
out_directory = Path("pim_plots_out")
sns.set_theme()
df = pl.read_csv("pim_results.csv")
def calc_speedup(x):
return x.iat[0] / x.iat[1]
workload_sets = {
"vector": ["vadd", "vmul", "haxpy"],
"matrix": ["gemv", "dnn"],
}
workload_sets = [["vadd", "vmul", "haxpy"], ["gemv", "gemv_layers"]]
workload_mapping = {
"gemv_layers": "dnn",
}
for workload_set in workload_sets:
workload_filter = df["workload"].isin(workload_set)
system_mapping = {
"HBM": "hbm",
"PIM-HBM": "pim"
}
for frequency in df["frequency"].unique():
frequency_filter = df["frequency"] == frequency
def calc_speedup(tick_list):
return tick_list[0] / tick_list[1]
filtered_df = df[workload_filter & frequency_filter]
print(filtered_df)
preprocessed_df = filtered_df.groupby(["workload", "level", "frequency"], as_index=False).agg({"ticks": calc_speedup}).rename(columns={"ticks":"speedup"})
print(preprocessed_df)
# preprocessed_df.to_csv("plot.csv", index=False)
df = df.with_columns(pl.col("workload").replace(workload_mapping))
df = df.with_columns(pl.col("system").replace(system_mapping))
g = sns.catplot(
data=preprocessed_df, kind="bar",
x="level", y="speedup", hue="workload",
palette="dark", alpha=.6, height=6
df = df.group_by(
["workload", "level", "frequency"], maintain_order=True
).agg(pl.col("ticks").map_elements(calc_speedup).alias("speedup"))
for name, data in df.group_by(
"frequency",
pl.when(pl.col("workload").is_in(workload_sets["vector"]))
.then(pl.lit("vector"))
.when(pl.col("workload").is_in(workload_sets["matrix"]))
.then(pl.lit("matrix")),
):
plot = sns.catplot(
data=data.to_pandas(),
kind="bar",
x="level",
y="speedup",
hue="workload",
palette="dark",
alpha=0.6,
height=6,
)
plot.set_axis_labels("Level", "Speedup")
plot.set(title=name[0] + name[1])
g.despine(left=True)
g.set_axis_labels("", "Speedup")
g.set(title=frequency)
g.legend.set_title("")
plot.fig.subplots_adjust(top=0.95)
for workload in workload_set:
export_df = preprocessed_df[preprocessed_df["workload"] == workload]
data = data.pivot(index=["level"], columns=["workload"], values=["speedup"])
print(data)
filename = f"{workload}_{frequency}.csv"
directory = Path("plots_out")
export_df.to_csv(directory / filename, index=False)
data.write_csv(out_directory / f"{name[1]}_{name[0]}.csv")
plt.show()

View File

@@ -1,13 +1,13 @@
import subprocess
import csv
import copy
import dataclasses
import json
import pandas as pd
from tqdm import tqdm
from dataclasses import dataclass
from threading import Thread
from multiprocessing.pool import ThreadPool
from pathlib import Path
from typing import Dict
from configs.pim_config import Configuration, Statistics
gem5 = Path("build/ARM/gem5.opt")
@@ -15,17 +15,17 @@ out_dir_base = Path("pim_out")
pim_simulation = Path("configs/pim_simulation.py")
class Gem5Thread(Thread):
def __init__(self, configuration: Configuration) -> None:
super().__init__()
self.configuration = configuration
@dataclass
class WorkItem:
configuration: Configuration
statistics: Statistics | None = None
def run(self):
def run_gem5_process(work_item: WorkItem):
serialized_configuration = json.dumps(
dataclasses.asdict(self.configuration)
dataclasses.asdict(work_item.configuration)
)
out_dir = out_dir_base / configuration.name
out_dir = out_dir_base / work_item.configuration.name
out = subprocess.run(
[
@@ -38,8 +38,7 @@ class Gem5Thread(Thread):
)
output = out.stdout.splitlines()[-1]
self.statistics = Statistics(**json.loads(output))
work_item.statistics = Statistics(**json.loads(output))
workload_base_directory = Path("kernels")
workload_sub_directory = Path("aarch64-unknown-none/release")
@@ -60,7 +59,9 @@ systems = [
configurations: list[Configuration] = []
for frequency in ["3GHz", "100GHz"]:
# for frequency in ["100GHz"]:
for level in ["X1", "X2", "X3", "X4"]:
# for level in ["X3"]:
for system in systems:
for workload in workloads:
executable = workload
@@ -77,28 +78,26 @@ for frequency in ["3GHz", "100GHz"]:
configurations.append(
Configuration(
f"{workload}_{level}_{frequency}",
f"{workload}_{level}_{system}_{frequency}",
workload,
executable.as_posix(),
level,
system == "PIM-HBM",
system,
frequency,
)
)
threads: list[Gem5Thread] = []
work_items = [WorkItem(configuration) for configuration in configurations]
with ThreadPool() as pool:
for _ in tqdm(pool.imap_unordered(run_gem5_process, work_items), total=len(work_items)):
pass
for configuration in configurations:
thread = Gem5Thread(configuration)
thread.start()
threads.append(thread)
results: list[dict] = []
for thread in threads:
thread.join()
result = dataclasses.asdict(thread.configuration) | dataclasses.asdict(thread.statistics)
for work_item in work_items:
result = dataclasses.asdict(work_item.configuration) | dataclasses.asdict(work_item.statistics)
results.append(result)
dataframe = pd.DataFrame(results)

59
torch_plots.py Normal file
View File

@@ -0,0 +1,59 @@
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
from pathlib import Path
out_directory = Path("torch_plots_out")
system_mapping = {
"HBM": "hbm",
"PIM-HBM": "pim"
}
gem_df = pl.read_csv("pim_results.csv")
gem_df = gem_df.with_columns(pl.col("system").replace(system_mapping))
gem_df = gem_df.with_columns(
pl.concat_str(["system", "frequency"], separator="_").alias("system")
)
gem_df = gem_df.select(["workload", "level", "system", "ticks"])
vega_df = pl.read_csv("vega_results.csv")
vega_df = vega_df.rename({"runtime": "ticks"})
vega_df = vega_df.with_columns(pl.lit("vega").alias("system"))
tesla_df = pl.read_csv("tesla_results.csv")
tesla_df = tesla_df.rename({"runtime": "ticks"})
tesla_df = tesla_df.with_columns(pl.lit("tesla").alias("system"))
df = pl.concat([gem_df, vega_df, tesla_df], how="diagonal")
workload_sets = [["vadd", "vmul", "haxpy"], ["gemv", "dnn"]]
workload_mapping = {
"gemv_layers": "dnn",
}
df = df.with_columns(pl.col("workload").replace(workload_mapping))
# for workload_set in workload_sets:
# temp_df = df.filter(pl.col("workload").is_in(workload_set))
g = sns.catplot(
data=df.to_pandas(),
kind="bar",
x="level",
y="ticks",
hue="system",
col="workload",
palette="dark",
alpha=0.6,
height=6,
)
for name, data in df.group_by("system"):
data = data.pivot(index=["level"], columns=["workload"], values=["ticks"])
data.write_csv(out_directory / f"{name}.csv")
print(data)
plt.show()

52
wallclock_time_plots.py Normal file
View File

@@ -0,0 +1,52 @@
import re
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from pathlib import Path
stats_dir = Path("pim_out")
runtime_dict: dict[str, list[any]] = {}
for element in stats_dir.iterdir():
print(element.name)
matches = re.search(r'(\w+)_(\w+)_(\w*-*\w*)_(\w+)', element.name)
workload, level, system, freq = matches.group(1), matches.group(2), matches.group(3), matches.group(4)
with open(element / "stats.txt") as f:
regex = re.compile(r'hostSeconds\ +(\d+.\d+).*')
for line in f:
result = regex.search(line)
if result is not None:
# implicitly only get last match in file...
runtime = result.group(1)
runtime_dict.setdefault("workload", []).append(workload)
runtime_dict.setdefault("level", []).append(level)
runtime_dict.setdefault("system", []).append(system)
runtime_dict.setdefault("freq", []).append(freq)
runtime_dict.setdefault("runtime", []).append(float(runtime))
df = pl.DataFrame(runtime_dict)
df = df.filter((pl.col("freq") == "100GHz") & (pl.col("level") == "X3"))
df = df.drop("freq")
print(df)
plot = sns.catplot(
data=df.to_pandas(),
kind="bar",
x="system",
y="runtime",
hue="workload",
palette="dark",
alpha=0.6,
height=6,
)
plot.set_axis_labels("PIM vs. Non-PIM", "Runtime [s]")
plot.set(title="Wallclock Time")
plot.fig.subplots_adjust(top=0.95)
plt.show()