diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 19df310295..7ddc4f0752 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -231,6 +231,42 @@ def makeGpuFSSystem(args): clock=args.ruby_clock, voltage_domain=system.voltage_domain ) + # If we are using KVM cpu, enable AVX. AVX is used in some ROCm libraries + # such as rocBLAS which is used in higher level libraries like PyTorch. + use_avx = False + if ObjectList.is_kvm_cpu(TestCPUClass): + # AVX also requires CR4.osxsave to be 1. These must be set together + # of KVM will error out. + system.workload.enable_osxsave = 1 + use_avx = True + + # These values are taken from a real CPU and are further explained here: + # https://sandpile.org/x86/cpuid.htm#level_0000_000Dh + avx_extended_state = [ + 0x00000007, + 0x00000340, + 0x00000000, + 0x00000340, + 0x0000000F, + 0x00000340, + 0x00000000, + 0x00000000, + 0x00000100, + 0x00000240, + 0x00000000, + 0x00000040, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + ] + + # This modifies the default value for ECX only (4th in this array). + # See: https://sandpile.org/x86/cpuid.htm#level_0000_0001h + # Enables AVX, OSXSAVE, XSAVE, POPCNT, SSE4.2, SSE4.1, CMPXCHG16B, + # and FMA. + avx_cpu_features = [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x1C983209] + for (i, cpu) in enumerate(system.cpu): # Break once we reach the shader "CPU" if i == args.num_cpus: @@ -247,6 +283,9 @@ def makeGpuFSSystem(args): for j in range(len(system.cpu[i].isa)): system.cpu[i].isa[j].vendor_string = "AuthenticAMD" + if use_avx: + system.cpu[i].isa[j].ExtendedState = avx_extended_state + system.cpu[i].isa[j].FamilyModelStepping = avx_cpu_features if args.host_parallel: # To get the KVM CPUs to run on different host CPUs, specify a diff --git a/src/arch/x86/X86FsWorkload.py b/src/arch/x86/X86FsWorkload.py index 294241b51c..277a37988e 100644 --- a/src/arch/x86/X86FsWorkload.py +++ b/src/arch/x86/X86FsWorkload.py @@ -65,6 +65,7 @@ class X86FsWorkload(KernelWorkload): acpi_description_table_pointer = Param.X86ACPIRSDP( X86ACPIRSDP(), "ACPI root description pointer structure" ) + enable_osxsave = Param.Bool(False, "Enable OSXSAVE in CR4 register") class X86FsLinux(X86FsWorkload): diff --git a/src/arch/x86/X86ISA.py b/src/arch/x86/X86ISA.py index bb72c415e9..aa48d1aa6e 100644 --- a/src/arch/x86/X86ISA.py +++ b/src/arch/x86/X86ISA.py @@ -54,3 +54,73 @@ class X86ISA(BaseISA): vendor_string = Param.String( "HygonGenuine", "Vendor string for CPUID instruction" ) + name_string = Param.String( + "Fake gem5 x86_64 CPU", "Processor name for CPUID instruction" + ) + + # For the functions that return numerical values we use a vector of ints. + # The order of the values is: EAX, EBX, EDX, ECX. + # + # If the CPU function can take an index, the index value is used as an + # offset into the vector and four numerical values are added for each + # possible index value. For example, if the function accepts 3 index + # values, there are 12 total ints in the vector param. In addition, the + # last values for functions which take an index must be all zeros. All + # zeros indicates to the KVM cpu / OS that there are no more index values + # to iterate over. + # + # A good resource for these values can be found here: + # https://sandpile.org/x86/cpuid.htm + # 0000_0001h + FamilyModelStepping = VectorParam.UInt32( + [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x00000209], + "type/family/model/stepping and feature flags", + ) + # 0000_0004h + CacheParams = VectorParam.UInt32( + [0x00000000, 0x00000000, 0x00000000, 0x00000000], + "cache configuration descriptors", + ) + # 0000_0007h + ExtendedFeatures = VectorParam.UInt32( + [0x00000000, 0x01800000, 0x00000000, 0x00000000], "feature flags" + ) + # 0000_000Dh - This uses ECX index, so the last entry must be all zeros + ExtendedState = VectorParam.UInt32( + [ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + ], + "extended state enumeration", + ) + # 8000_0001h + FamilyModelSteppingBrandFeatures = VectorParam.UInt32( + [0x00020F51, 0x00000405, 0xEBD3FBFF, 0x00020001], + "family/model/stepping and features flags", + ) + # 8000_0005h + L1CacheAndTLB = VectorParam.UInt32( + [0xFF08FF08, 0xFF20FF20, 0x40020140, 0x40020140], + "L1 cache and L1 TLB configuration descriptors", + ) + # 8000_0006h + L2L3CacheAndL2TLB = VectorParam.UInt32( + [0x00000000, 0x42004200, 0x00000000, 0x04008140], + "L2/L3 cache and L2 TLB configuration descriptors", + ) + # 8000_0007h + APMInfo = VectorParam.UInt32( + [0x80000018, 0x68747541, 0x69746E65, 0x444D4163], + "processor feedback capabilities", + ) + # 8000_0008h + LongModeAddressSize = VectorParam.UInt32( + [0x00003030, 0x00000000, 0x00000000, 0x00000000], + "miscellaneous information", + ) diff --git a/src/arch/x86/cpuid.cc b/src/arch/x86/cpuid.cc index ac4709ce0e..2ce9ec9289 100644 --- a/src/arch/x86/cpuid.cc +++ b/src/arch/x86/cpuid.cc @@ -31,162 +31,135 @@ #include "arch/x86/isa.hh" #include "base/bitfield.hh" #include "cpu/thread_context.hh" +#include "debug/X86.hh" namespace gem5 { -namespace X86ISA { - enum StandardCpuidFunction - { - VendorAndLargestStdFunc, - FamilyModelStepping, - CacheAndTLB, - SerialNumber, - CacheParams, - MonitorMwait, - ThermalPowerMgmt, - ExtendedFeatures, - NumStandardCpuidFuncs - }; +namespace X86ISA +{ - enum ExtendedCpuidFunctions - { - VendorAndLargestExtFunc, - FamilyModelSteppingBrandFeatures, - NameString1, - NameString2, - NameString3, - L1CacheAndTLB, - L2L3CacheAndL2TLB, - APMInfo, - LongModeAddressSize, +X86CPUID::X86CPUID(const std::string& vendor, const std::string& name) + : vendorString(vendor), nameString(name) +{ + fatal_if(vendorString.size() != 12, + "CPUID vendor string must be 12 characters\n"); +} - /* - * The following are defined by the spec but not yet implemented - */ -/* // Function 9 is reserved - SVMInfo = 10, - // Functions 11-24 are reserved - TLB1GBPageInfo = 25, - PerformanceInfo,*/ +void +X86CPUID::addStandardFunc(uint32_t func, std::vector values) +{ + capabilities[func] = values; +} - NumExtendedCpuidFuncs - }; +void +X86CPUID::addExtendedFunc(uint32_t func, std::vector values) +{ + // Extended functions begin with 8000_0000h, but the enum is based from + // zero, so we need to add that to the function value. + capabilities[func | 0x80000000] = values; +} - static const int nameStringSize = 48; - static const char nameString[nameStringSize] = "Fake M5 x86_64 CPU"; +bool +X86CPUID::doCpuid(ThreadContext * tc, uint32_t function, uint32_t index, + CpuidResult &result) +{ + constexpr uint32_t ext = 0x80000000; - uint64_t - stringToRegister(const char *str) - { - uint64_t reg = 0; - for (int pos = 3; pos >=0; pos--) { - reg <<= 8; - reg |= str[pos]; - } - return reg; - } + DPRINTF(X86, "Calling CPUID function %x with index %d\n", function, index); - bool - doCpuid(ThreadContext * tc, uint32_t function, - uint32_t index, CpuidResult &result) - { - uint16_t family = bits(function, 31, 16); - uint16_t funcNum = bits(function, 15, 0); - if (family == 0x8000) { - // The extended functions - switch (funcNum) { - case VendorAndLargestExtFunc: - { - ISA *isa = dynamic_cast(tc->getIsaPtr()); - auto vendor_string = isa->getVendorString(); - result = CpuidResult( - 0x80000000 + NumExtendedCpuidFuncs - 1, - stringToRegister(vendor_string.c_str()), - stringToRegister(vendor_string.c_str() + 4), - stringToRegister(vendor_string.c_str() + 8)); - } - break; - case FamilyModelSteppingBrandFeatures: - result = CpuidResult(0x00020f51, 0x00000405, - 0xebd3fbff, 0x00020001); - break; - case NameString1: - case NameString2: - case NameString3: - { - // Zero fill anything beyond the end of the string. This - // should go away once the string is a vetted parameter. - char cleanName[nameStringSize]; - memset(cleanName, '\0', nameStringSize); - strncpy(cleanName, nameString, nameStringSize); + // Handle the string-related CPUID functions specially + if (function == VendorAndLargestStdFunc) { + result = CpuidResult(NumStandardCpuidFuncs - 1, + stringToRegister(vendorString.c_str()), + stringToRegister(vendorString.c_str() + 4), + stringToRegister(vendorString.c_str() + 8)); - int offset = (funcNum - NameString1) * 16; - assert(nameStringSize >= offset + 16); - result = CpuidResult( - stringToRegister(cleanName + offset + 0), - stringToRegister(cleanName + offset + 4), - stringToRegister(cleanName + offset + 12), - stringToRegister(cleanName + offset + 8)); - } - break; - case L1CacheAndTLB: - result = CpuidResult(0xff08ff08, 0xff20ff20, - 0x40020140, 0x40020140); - break; - case L2L3CacheAndL2TLB: - result = CpuidResult(0x00000000, 0x42004200, - 0x00000000, 0x04008140); - break; - case APMInfo: - result = CpuidResult(0x80000018, 0x68747541, - 0x69746e65, 0x444d4163); - break; - case LongModeAddressSize: - result = CpuidResult(0x00003030, 0x00000000, - 0x00000000, 0x00000000); - break; -/* case SVMInfo: - case TLB1GBPageInfo: - case PerformanceInfo:*/ - default: - warn("x86 cpuid family 0x8000: unimplemented function %u", - funcNum); - return false; - } - } else if (family == 0x0000) { - // The standard functions - switch (funcNum) { - case VendorAndLargestStdFunc: - { - ISA *isa = dynamic_cast(tc->getIsaPtr()); - auto vendor_string = isa->getVendorString(); - result = CpuidResult( - NumStandardCpuidFuncs - 1, - stringToRegister(vendor_string.c_str()), - stringToRegister(vendor_string.c_str() + 4), - stringToRegister(vendor_string.c_str() + 8)); - } - break; - case FamilyModelStepping: - result = CpuidResult(0x00020f51, 0x00000805, - 0xefdbfbff, 0x00000209); - break; - case ExtendedFeatures: - result = CpuidResult(0x00000000, 0x01800000, - 0x00000000, 0x00000000); - break; - default: - warn("x86 cpuid family 0x0000: unimplemented function %u", - funcNum); - return false; - } - } else { - warn("x86 cpuid: unknown family %#x", family); - return false; - } + return true; + } else if (function == (ext | VendorAndLargestExtFunc)) { + result = CpuidResult(0x80000000 + NumExtendedCpuidFuncs - 1, + stringToRegister(vendorString.c_str()), + stringToRegister(vendorString.c_str() + 4), + stringToRegister(vendorString.c_str() + 8)); + + return true; + } else if ((function == (ext | NameString1)) || + (function == (ext | NameString2)) || + (function == (ext | NameString3))) { + // Zero fill anything beyond the end of the string. This + // should go away once the string is a vetted parameter. + char cleanName[nameStringSize]; + memset(cleanName, '\0', nameStringSize); + strncpy(cleanName, nameString.c_str(), nameStringSize-1); + + int funcNum = bits(function, 15, 0); + int offset = (funcNum - NameString1) * 16; + assert(nameStringSize >= offset + 16); + result = CpuidResult( + stringToRegister(cleanName + offset + 0), + stringToRegister(cleanName + offset + 4), + stringToRegister(cleanName + offset + 12), + stringToRegister(cleanName + offset + 8)); return true; } + + // Ignore anything not in the map of supported CPUID functions. + // This is checked after the string-related functions as those are not + // in the capabilities map. + if (!capabilities.count(function)) { + return false; + } + + int cap_offset = 0; + + // Ignore index values for functions that do not take index values. + if (hasSignificantIndex(function)) { + cap_offset = index * 4; + } + + // Ensure we have the offset and 4 dwords after it. + assert(capabilities[function].size() >= (cap_offset + 4)); + + auto &cap_vec = capabilities[function]; + result = CpuidResult(cap_vec[cap_offset + 0], cap_vec[cap_offset + 1], + cap_vec[cap_offset + 2], cap_vec[cap_offset + 3]); + DPRINTF(X86, "CPUID function %x returning (%x, %x, %x, %x)\n", + function, result.rax, result.rbx, result.rdx, result.rcx); + + return true; +} + +uint64_t +X86CPUID::stringToRegister(const char *str) +{ + uint64_t reg = 0; + for (int pos = 3; pos >=0; pos--) { + reg <<= 8; + reg |= str[pos]; + } + return reg; +} + +// Return true if the CPUID function takes ECX index as an input AND +// those multiple index values are supported in gem5. +bool +X86CPUID::hasSignificantIndex(uint32_t function) +{ + uint16_t family = bits(function, 31, 16); + uint16_t funcNum = bits(function, 15, 0); + + if (family == 0x0000) { + switch (funcNum) { + case ExtendedState: + return true; + default: + return false; + } + } + + return false; +} + } // namespace X86ISA } // namespace gem5 diff --git a/src/arch/x86/cpuid.hh b/src/arch/x86/cpuid.hh index 5c1a8ccb16..1c932980d2 100644 --- a/src/arch/x86/cpuid.hh +++ b/src/arch/x86/cpuid.hh @@ -29,7 +29,10 @@ #ifndef __ARCH_X86_CPUID_HH__ #define __ARCH_X86_CPUID_HH__ +#include + #include "base/types.hh" +#include "params/X86ISA.hh" namespace gem5 { @@ -38,28 +41,74 @@ class ThreadContext; namespace X86ISA { - struct CpuidResult - { - uint64_t rax; - uint64_t rbx; - uint64_t rcx; - uint64_t rdx; - // These are not in alphebetical order on purpose. The order reflects - // how the CPUID orders the registers when it returns results. - CpuidResult(uint64_t _rax, uint64_t _rbx, - uint64_t _rdx, uint64_t _rcx) : - rax(_rax), rbx(_rbx), rcx(_rcx), rdx(_rdx) - {} +enum StandardCpuidFunction +{ + VendorAndLargestStdFunc, + FamilyModelStepping, + CacheAndTLB, + SerialNumber, + CacheParams, + MonitorMwait, + ThermalPowerMgmt, + ExtendedFeatures, + ExtendedState = 0xD, + NumStandardCpuidFuncs +}; - CpuidResult() - {} - }; +enum ExtendedCpuidFunctions +{ + VendorAndLargestExtFunc, + FamilyModelSteppingBrandFeatures, + NameString1, + NameString2, + NameString3, + L1CacheAndTLB, + L2L3CacheAndL2TLB, + APMInfo, + LongModeAddressSize, + NumExtendedCpuidFuncs +}; - uint64_t stringToRegister(const char *str); +constexpr int nameStringSize = 48; + +struct CpuidResult +{ + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + + // These are not in alphebetical order on purpose. The order reflects + // how the CPUID orders the registers when it returns results. + CpuidResult(uint64_t _rax, uint64_t _rbx, + uint64_t _rdx, uint64_t _rcx) : + rax(_rax), rbx(_rbx), rcx(_rcx), rdx(_rdx) + {} + + CpuidResult() + {} +}; + +class X86CPUID +{ + public: + X86CPUID(const std::string& vendor, const std::string& name); + + void addStandardFunc(uint32_t func, std::vector values); + void addExtendedFunc(uint32_t func, std::vector values); bool doCpuid(ThreadContext * tc, uint32_t function, - uint32_t index, CpuidResult &result); + uint32_t index, CpuidResult &result); + bool hasSignificantIndex(uint32_t function); + + private: + const std::string vendorString; + const std::string nameString; + std::unordered_map> capabilities; + + uint64_t stringToRegister(const char *str); +}; } // namespace X86ISA } // namespace gem5 diff --git a/src/arch/x86/fs_workload.cc b/src/arch/x86/fs_workload.cc index 1a412380a6..88d7deed68 100644 --- a/src/arch/x86/fs_workload.cc +++ b/src/arch/x86/fs_workload.cc @@ -58,7 +58,8 @@ FsWorkload::FsWorkload(const Params &p) : KernelWorkload(p), smbiosTable(p.smbios_table), mpFloatingPointer(p.intel_mp_pointer), mpConfigTable(p.intel_mp_table), - rsdp(p.acpi_description_table_pointer) + rsdp(p.acpi_description_table_pointer), + enable_osxsave(p.enable_osxsave) {} void @@ -295,6 +296,7 @@ FsWorkload::initState() CR4 cr4 = tc->readMiscRegNoEffect(misc_reg::Cr4); // Turn on pae. cr4.pae = 1; + cr4.osxsave = enable_osxsave; tc->setMiscReg(misc_reg::Cr4, cr4); // Point to the page tables. diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh index 9d14f91bb5..81db414fb2 100644 --- a/src/arch/x86/fs_workload.hh +++ b/src/arch/x86/fs_workload.hh @@ -106,6 +106,9 @@ class FsWorkload : public KernelWorkload Addr &fpSize, Addr &tableSize, Addr table=0); void writeOutACPITables(Addr begin, Addr &size); + + private: + bool enable_osxsave; }; } // namespace X86ISA diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 31efae3a43..9e6082a268 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -151,10 +151,20 @@ RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace -ISA::ISA(const X86ISAParams &p) : BaseISA(p), vendorString(p.vendor_string) +ISA::ISA(const X86ISAParams &p) + : BaseISA(p), cpuid(new X86CPUID(p.vendor_string, p.name_string)) { - fatal_if(vendorString.size() != 12, - "CPUID vendor string must be 12 characters\n"); + cpuid->addStandardFunc(FamilyModelStepping, p.FamilyModelStepping); + cpuid->addStandardFunc(CacheParams, p.CacheParams); + cpuid->addStandardFunc(ExtendedFeatures, p.ExtendedFeatures); + cpuid->addStandardFunc(ExtendedState, p.ExtendedState); + + cpuid->addExtendedFunc(FamilyModelSteppingBrandFeatures, + p.FamilyModelSteppingBrandFeatures); + cpuid->addExtendedFunc(L1CacheAndTLB, p.L1CacheAndTLB); + cpuid->addExtendedFunc(L2L3CacheAndL2TLB, p.L2L3CacheAndL2TLB); + cpuid->addExtendedFunc(APMInfo, p.APMInfo); + cpuid->addExtendedFunc(LongModeAddressSize, p.LongModeAddressSize); _regClasses.push_back(&flatIntRegClass); _regClasses.push_back(&flatFloatRegClass); diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh index f7ae210f96..9c6dcf0921 100644 --- a/src/arch/x86/isa.hh +++ b/src/arch/x86/isa.hh @@ -33,6 +33,7 @@ #include #include "arch/generic/isa.hh" +#include "arch/x86/cpuid.hh" #include "arch/x86/pcstate.hh" #include "arch/x86/regs/ccr.hh" #include "arch/x86/regs/float.hh" @@ -93,6 +94,8 @@ class ISA : public BaseISA void setThreadContext(ThreadContext *_tc) override; std::string getVendorString() const; + + std::unique_ptr cpuid; }; } // namespace X86ISA diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa index 38937cb3e2..dac5706a06 100644 --- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa +++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa @@ -690,8 +690,9 @@ } 0x2: CPUIDInst::CPUID({{ CpuidResult result; - bool success = doCpuid(xc->tcBase(), bits(Rax, 31, 0), - bits(Rcx, 31, 0), result); + ISA *isa = dynamic_cast(xc->tcBase()->getIsaPtr()); + bool success = isa->cpuid->doCpuid(xc->tcBase(), + bits(Rax, 31, 0), bits(Rcx, 31, 0), result); if (success) { Rax = result.rax; Rbx = result.rbx; diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa index 6fc5f448a0..9445f2032b 100644 --- a/src/arch/x86/isa/includes.isa +++ b/src/arch/x86/isa/includes.isa @@ -63,6 +63,7 @@ output header {{ #include "arch/x86/insts/microregop.hh" #include "arch/x86/insts/microspecop.hh" #include "arch/x86/insts/static_inst.hh" +#include "arch/x86/isa.hh" #include "arch/x86/regs/ccr.hh" #include "arch/x86/regs/int.hh" #include "arch/x86/regs/misc.hh" diff --git a/src/arch/x86/kvm/x86_cpu.cc b/src/arch/x86/kvm/x86_cpu.cc index 7faa9159ab..e1c1b0dfc0 100644 --- a/src/arch/x86/kvm/x86_cpu.cc +++ b/src/arch/x86/kvm/x86_cpu.cc @@ -37,6 +37,7 @@ #include "arch/x86/cpuid.hh" #include "arch/x86/faults.hh" #include "arch/x86/interrupts.hh" +#include "arch/x86/isa.hh" #include "arch/x86/regs/float.hh" #include "arch/x86/regs/int.hh" #include "arch/x86/regs/msr.hh" @@ -73,6 +74,13 @@ using namespace X86ISA; // data) is used to indicate that a segment has been accessed. #define SEG_TYPE_BIT_ACCESSED 1 +// Some linux distro s(e.g., RHEL7) define the KVM macros using "BIT" but do +// not include where BIT is defined, so define it here in that case. +#ifndef BIT +#define BIT(nr) (1UL << (nr)) +#endif + + struct GEM5_PACKED FXSave { uint16_t fcw; @@ -1419,12 +1427,12 @@ X86KvmCPU::ioctlRun() static struct kvm_cpuid_entry2 makeKvmCpuid(uint32_t function, uint32_t index, - CpuidResult &result) + CpuidResult &result, uint32_t flags = 0) { struct kvm_cpuid_entry2 e; e.function = function; e.index = index; - e.flags = 0; + e.flags = flags; e.eax = (uint32_t)result.rax; e.ebx = (uint32_t)result.rbx; e.ecx = (uint32_t)result.rcx; @@ -1437,33 +1445,74 @@ void X86KvmCPU::updateCPUID() { Kvm::CPUIDVector m5_supported; - - /* TODO: We currently don't support any of the functions that - * iterate through data structures in the CPU using an index. It's - * currently not a problem since M5 doesn't expose any of them at - * the moment. - */ + X86ISA::ISA *isa = dynamic_cast(tc->getIsaPtr()); /* Basic features */ CpuidResult func0; - X86ISA::doCpuid(tc, 0x0, 0, func0); + isa->cpuid->doCpuid(tc, 0x0, 0, func0); for (uint32_t function = 0; function <= func0.rax; ++function) { CpuidResult cpuid; uint32_t idx(0); - X86ISA::doCpuid(tc, function, idx, cpuid); - m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); + if (!isa->cpuid->hasSignificantIndex(function)) { + isa->cpuid->doCpuid(tc, function, idx, cpuid); + m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); + } else { + while (true) { + bool rv = isa->cpuid->doCpuid(tc, function, idx, cpuid); + assert(rv); + + if (idx && + !cpuid.rax && !cpuid.rbx && !cpuid.rdx && !cpuid.rcx) { + break; + } + + /* + * For functions in family 0, this flag tells Linux to compare + * the index as well as the function number rather than only + * the function number. Important: Do NOT set this flag if the + * function does not take an index. Doing so will break SMP. + */ + uint32_t flag = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + m5_supported.push_back( + makeKvmCpuid(function, idx, cpuid, flag)); + idx++; + } + } } /* Extended features */ CpuidResult efunc0; - X86ISA::doCpuid(tc, 0x80000000, 0, efunc0); + isa->cpuid->doCpuid(tc, 0x80000000, 0, efunc0); for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) { CpuidResult cpuid; uint32_t idx(0); - X86ISA::doCpuid(tc, function, idx, cpuid); - m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); + if (!isa->cpuid->hasSignificantIndex(function)) { + isa->cpuid->doCpuid(tc, function, idx, cpuid); + m5_supported.push_back(makeKvmCpuid(function, idx, cpuid)); + } else { + while (true) { + bool rv = isa->cpuid->doCpuid(tc, function, idx, cpuid); + assert(rv); + + if (idx && + !cpuid.rax && !cpuid.rbx && !cpuid.rdx && !cpuid.rcx) { + break; + } + + /* + * For functions in family 0, this flag tells Linux to compare + * the index as well as the function number rather than only + * the function number. Important: Do NOT set this flag if the + * function does not take an index. Doing so will break SMP. + */ + uint32_t flag = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + m5_supported.push_back( + makeKvmCpuid(function, idx, cpuid, flag)); + idx++; + } + } } setCPUID(m5_supported);