Usage of torch benchmark suite

This commit is contained in:
2024-03-06 12:35:29 +01:00
parent 3808bcd478
commit 734b77d1fe

View File

@@ -12,7 +12,7 @@ from config import Statistics, Configuration
device = torch.device("cuda:0") device = torch.device("cuda:0")
ITERATIONS = 100_000 ITERATIONS = 10_000
def run_gemv_bench(workload, level): def run_gemv_bench(workload, level):
@@ -42,18 +42,20 @@ def run_gemv_bench(workload, level):
device=device, device=device,
) )
input_vector = torch.rand(COLUMNS, dtype=torch.float16, device=device) input_vector = torch.rand(COLUMNS, dtype=torch.float16, device=device)
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record() def bench_callback(matrix, input_vector):
for _ in range(ITERATIONS):
torch.matmul(matrix, input_vector) torch.matmul(matrix, input_vector)
end.record()
torch.cuda.synchronize() timer = benchmark.Timer(
"bench_callback(matrix, input_vector)",
globals={
"bench_callback": bench_callback,
"matrix": matrix,
"input_vector": input_vector,
},
)
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
return runtime return runtime
@@ -74,22 +76,22 @@ def run_gemv_layers_bench(workload, level):
device=device, device=device,
) )
input_vector = torch.rand(DIMENSIONS, dtype=torch.float16, device=device) input_vector = torch.rand(DIMENSIONS, dtype=torch.float16, device=device)
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record() def bench_callback(matrix, input_vector):
for _ in range(ITERATIONS):
for _ in range(5): for _ in range(5):
input_vector = torch.matmul(matrix, input_vector) input_vector = torch.matmul(matrix, input_vector)
input_vector.relu() input_vector.relu()
end.record() timer = benchmark.Timer(
"bench_callback(matrix, input_vector)",
globals={
"bench_callback": bench_callback,
"matrix": matrix,
"input_vector": input_vector,
},
)
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
torch.cuda.synchronize()
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
return runtime return runtime
@@ -108,26 +110,30 @@ def run_vector_bench(workload, level):
vector_b = torch.rand(ROWS, dtype=torch.float16, device=device) vector_b = torch.rand(ROWS, dtype=torch.float16, device=device)
func = getattr(wl, workload) func = getattr(wl, workload)
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record() match workload:
for _ in range(ITERATIONS): case "vadd":
match workload: bench_callback = lambda vector_a, vector_b: torch.add(vector_a, vector_b)
case "vadd": case "vmul":
torch.add(vector_a, vector_b) bench_callback = lambda vector_a, vector_b: torch.mul(vector_a, vector_b)
case "vmul": case "haxpy":
torch.mul(vector_a, vector_b) bench_callback = lambda vector_a, vector_b: torch.add(
case "haxpy": vector_a, vector_b, alpha=2
torch.add(vector_a, vector_b, alpha=2) )
end.record()
torch.cuda.synchronize() timer = benchmark.Timer(
"bench_callback(vector_a, vector_b)",
globals={
"bench_callback": bench_callback,
"vector_a": vector_a,
"vector_b": vector_b,
},
)
runtime = int(timer.timeit(ITERATIONS).mean * 1e12)
runtime = int(start.elapsed_time(end) * 1e9 / ITERATIONS)
return runtime return runtime
workloads = [ workloads = [
("vadd", run_vector_bench), ("vadd", run_vector_bench),
("vmul", run_vector_bench), ("vmul", run_vector_bench),