Usage of torch benchmark suite
This commit is contained in:
74
benches.py
74
benches.py
@@ -12,7 +12,7 @@ from config import Statistics, Configuration
|
|||||||
|
|
||||||
device = torch.device("cuda:0")
|
device = torch.device("cuda:0")
|
||||||
|
|
||||||
ITERATIONS = 100_000
|
ITERATIONS = 10_000
|
||||||
|
|
||||||
|
|
||||||
def run_gemv_bench(workload, level):
|
def run_gemv_bench(workload, level):
|
||||||
@@ -42,18 +42,20 @@ def run_gemv_bench(workload, level):
|
|||||||
device=device,
|
device=device,
|
||||||
)
|
)
|
||||||
input_vector = torch.rand(COLUMNS, dtype=torch.float16, device=device)
|
input_vector = torch.rand(COLUMNS, dtype=torch.float16, device=device)
|
||||||
|
|
||||||
start = torch.cuda.Event(enable_timing=True)
|
|
||||||
end = torch.cuda.Event(enable_timing=True)
|
|
||||||
|
|
||||||
start.record()
|
def bench_callback(matrix, input_vector):
|
||||||
for _ in range(ITERATIONS):
|
|
||||||
torch.matmul(matrix, input_vector)
|
torch.matmul(matrix, input_vector)
|
||||||
end.record()
|
|
||||||
|
|
||||||
torch.cuda.synchronize()
|
timer = benchmark.Timer(
|
||||||
|
"bench_callback(matrix, input_vector)",
|
||||||
|
globals={
|
||||||
|
"bench_callback": bench_callback,
|
||||||
|
"matrix": matrix,
|
||||||
|
"input_vector": input_vector,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
|
||||||
|
|
||||||
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
|
|
||||||
return runtime
|
return runtime
|
||||||
|
|
||||||
|
|
||||||
@@ -74,22 +76,22 @@ def run_gemv_layers_bench(workload, level):
|
|||||||
device=device,
|
device=device,
|
||||||
)
|
)
|
||||||
input_vector = torch.rand(DIMENSIONS, dtype=torch.float16, device=device)
|
input_vector = torch.rand(DIMENSIONS, dtype=torch.float16, device=device)
|
||||||
|
|
||||||
start = torch.cuda.Event(enable_timing=True)
|
|
||||||
end = torch.cuda.Event(enable_timing=True)
|
|
||||||
|
|
||||||
start.record()
|
def bench_callback(matrix, input_vector):
|
||||||
|
|
||||||
for _ in range(ITERATIONS):
|
|
||||||
for _ in range(5):
|
for _ in range(5):
|
||||||
input_vector = torch.matmul(matrix, input_vector)
|
input_vector = torch.matmul(matrix, input_vector)
|
||||||
input_vector.relu()
|
input_vector.relu()
|
||||||
|
|
||||||
end.record()
|
timer = benchmark.Timer(
|
||||||
|
"bench_callback(matrix, input_vector)",
|
||||||
|
globals={
|
||||||
|
"bench_callback": bench_callback,
|
||||||
|
"matrix": matrix,
|
||||||
|
"input_vector": input_vector,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
|
||||||
|
|
||||||
torch.cuda.synchronize()
|
|
||||||
|
|
||||||
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
|
|
||||||
return runtime
|
return runtime
|
||||||
|
|
||||||
|
|
||||||
@@ -108,26 +110,30 @@ def run_vector_bench(workload, level):
|
|||||||
vector_b = torch.rand(ROWS, dtype=torch.float16, device=device)
|
vector_b = torch.rand(ROWS, dtype=torch.float16, device=device)
|
||||||
|
|
||||||
func = getattr(wl, workload)
|
func = getattr(wl, workload)
|
||||||
|
|
||||||
start = torch.cuda.Event(enable_timing=True)
|
|
||||||
end = torch.cuda.Event(enable_timing=True)
|
|
||||||
|
|
||||||
start.record()
|
match workload:
|
||||||
for _ in range(ITERATIONS):
|
case "vadd":
|
||||||
match workload:
|
bench_callback = lambda vector_a, vector_b: torch.add(vector_a, vector_b)
|
||||||
case "vadd":
|
case "vmul":
|
||||||
torch.add(vector_a, vector_b)
|
bench_callback = lambda vector_a, vector_b: torch.mul(vector_a, vector_b)
|
||||||
case "vmul":
|
case "haxpy":
|
||||||
torch.mul(vector_a, vector_b)
|
bench_callback = lambda vector_a, vector_b: torch.add(
|
||||||
case "haxpy":
|
vector_a, vector_b, alpha=2
|
||||||
torch.add(vector_a, vector_b, alpha=2)
|
)
|
||||||
end.record()
|
|
||||||
|
|
||||||
torch.cuda.synchronize()
|
timer = benchmark.Timer(
|
||||||
|
"bench_callback(vector_a, vector_b)",
|
||||||
|
globals={
|
||||||
|
"bench_callback": bench_callback,
|
||||||
|
"vector_a": vector_a,
|
||||||
|
"vector_b": vector_b,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
|
||||||
|
|
||||||
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
|
|
||||||
return runtime
|
return runtime
|
||||||
|
|
||||||
|
|
||||||
workloads = [
|
workloads = [
|
||||||
("vadd", run_vector_bench),
|
("vadd", run_vector_bench),
|
||||||
("vmul", run_vector_bench),
|
("vmul", run_vector_bench),
|
||||||
|
|||||||
Reference in New Issue
Block a user