From 9acfc5a751c4df408b2fa9f73aa4512392c48c02 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 21 Jul 2023 16:25:03 -0500 Subject: [PATCH] configs: Enable AVX2 for GPUFS+KVM AVX is a requirement for some ROCm libraries, such as rocBLAS, which are themselves requirements for libraries higher up the stack like PyTorch. This patch sets the necessary CPUID bits in the GPUFS config to enable AVX, AVX2, and various SSE features so that applications using these libraries do not cause an illegal instruction trap. Change-Id: Id22f543fb2a06b268271725a54075ee6a9a1f041 --- configs/example/gpufs/system/system.py | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 19df310295..7ddc4f0752 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -231,6 +231,42 @@ def makeGpuFSSystem(args): clock=args.ruby_clock, voltage_domain=system.voltage_domain ) + # If we are using KVM cpu, enable AVX. AVX is used in some ROCm libraries + # such as rocBLAS which is used in higher level libraries like PyTorch. + use_avx = False + if ObjectList.is_kvm_cpu(TestCPUClass): + # AVX also requires CR4.osxsave to be 1. These must be set together + # of KVM will error out. + system.workload.enable_osxsave = 1 + use_avx = True + + # These values are taken from a real CPU and are further explained here: + # https://sandpile.org/x86/cpuid.htm#level_0000_000Dh + avx_extended_state = [ + 0x00000007, + 0x00000340, + 0x00000000, + 0x00000340, + 0x0000000F, + 0x00000340, + 0x00000000, + 0x00000000, + 0x00000100, + 0x00000240, + 0x00000000, + 0x00000040, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + ] + + # This modifies the default value for ECX only (4th in this array). + # See: https://sandpile.org/x86/cpuid.htm#level_0000_0001h + # Enables AVX, OSXSAVE, XSAVE, POPCNT, SSE4.2, SSE4.1, CMPXCHG16B, + # and FMA. + avx_cpu_features = [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x1C983209] + for (i, cpu) in enumerate(system.cpu): # Break once we reach the shader "CPU" if i == args.num_cpus: @@ -247,6 +283,9 @@ def makeGpuFSSystem(args): for j in range(len(system.cpu[i].isa)): system.cpu[i].isa[j].vendor_string = "AuthenticAMD" + if use_avx: + system.cpu[i].isa[j].ExtendedState = avx_extended_state + system.cpu[i].isa[j].FamilyModelStepping = avx_cpu_features if args.host_parallel: # To get the KVM CPUs to run on different host CPUs, specify a