diff --git a/src/arch/x86/fs_workload.cc b/src/arch/x86/fs_workload.cc index 88d7deed68..1ac5d439d1 100644 --- a/src/arch/x86/fs_workload.cc +++ b/src/arch/x86/fs_workload.cc @@ -302,6 +302,11 @@ FsWorkload::initState() // Point to the page tables. tc->setMiscReg(misc_reg::Cr3, PageMapLevel4); + // Only used if cr4.osxsave is set + XCR0 xcr0 = tc->readMiscRegNoEffect(misc_reg::Xcr0); + xcr0.x87 = 1; // Must be 1 according to x86 specification + tc->setMiscReg(misc_reg::Xcr0, xcr0); + Efer efer = tc->readMiscRegNoEffect(misc_reg::Efer); // Enable long mode. efer.lme = 1; diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 4efedbc894..fd3e21273f 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -338,6 +338,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) break; case misc_reg::Cr8: break; + case misc_reg::Xcr0: + break; case misc_reg::Rflags: { RFLAGS rflags = val; diff --git a/src/arch/x86/kvm/x86_cpu.cc b/src/arch/x86/kvm/x86_cpu.cc index da6e1bb9e1..67df843000 100644 --- a/src/arch/x86/kvm/x86_cpu.cc +++ b/src/arch/x86/kvm/x86_cpu.cc @@ -732,6 +732,7 @@ X86KvmCPU::updateKvmState() updateKvmStateSRegs(); updateKvmStateFPU(); updateKvmStateMSRs(); + updateKvmStateXCRs(); DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); if (debug::KvmContext) @@ -996,6 +997,22 @@ X86KvmCPU::updateKvmStateMSRs() setMSRs(msrs); } +void +X86KvmCPU::updateKvmStateXCRs() +{ + struct kvm_xcrs xcrs; + + xcrs.nr_xcrs = NumXCRegs; + xcrs.flags = 0; + + for (int i = 0; i < xcrs.nr_xcrs; ++i) { + xcrs.xcrs[i].xcr = i; + xcrs.xcrs[i].value = tc->readMiscReg(misc_reg::xcr(i)); + } + + setXCRs(xcrs); +} + void X86KvmCPU::updateThreadContext() { @@ -1023,6 +1040,7 @@ X86KvmCPU::updateThreadContext() updateThreadContextFPU(fpu); } updateThreadContextMSRs(); + updateThreadContextXCRs(); // The M5 misc reg caches some values from other // registers. Writing to it with side effects causes it to be @@ -1189,6 +1207,18 @@ X86KvmCPU::updateThreadContextMSRs() } } +void +X86KvmCPU::updateThreadContextXCRs() +{ + struct kvm_xcrs xcrs; + + getXCRs(xcrs); + + for (int i = 0; i < xcrs.nr_xcrs; ++i) { + tc->setMiscReg(misc_reg::xcr(xcrs.xcrs[i].xcr), xcrs.xcrs[i].value); + } +} + void X86KvmCPU::deliverInterrupts() { diff --git a/src/arch/x86/kvm/x86_cpu.hh b/src/arch/x86/kvm/x86_cpu.hh index 33f58d7997..f08d858db8 100644 --- a/src/arch/x86/kvm/x86_cpu.hh +++ b/src/arch/x86/kvm/x86_cpu.hh @@ -217,6 +217,8 @@ class X86KvmCPU : public BaseKvmCPU void updateKvmStateFPUXSave(); /** Update MSR registers */ void updateKvmStateMSRs(); + /** Update XCR registers */ + void updateKvmStateXCRs(); /** @} */ /** @@ -236,6 +238,8 @@ class X86KvmCPU : public BaseKvmCPU void updateThreadContextXSave(const struct kvm_xsave &kxsave); /** Update MSR registers */ void updateThreadContextMSRs(); + /** Update XCR registers */ + void updateThreadContextXCRs(); /** @} */ /** Transfer gem5's CPUID values into the virtual CPU. */ diff --git a/src/arch/x86/regs/misc.hh b/src/arch/x86/regs/misc.hh index 1784142dbe..535d251948 100644 --- a/src/arch/x86/regs/misc.hh +++ b/src/arch/x86/regs/misc.hh @@ -405,6 +405,9 @@ enum : RegIndex // "Fake" MSRs for internally implemented devices PciConfigAddress, + XcrBase, + Xcr0 = XcrBase, + NumRegs }; @@ -424,6 +427,13 @@ cr(int index) return CrBase + index; } +static inline RegIndex +xcr(int index) +{ + assert(index >= 0 && index < NumXCRegs); + return XcrBase + index; +} + static inline RegIndex dr(int index) { @@ -649,6 +659,24 @@ BitUnion64(CR8) Bitfield<3, 0> tpr; // Task Priority Register EndBitUnion(CR8) +BitUnion64(XCR0) + Bitfield<0> x87; // x87 FPU/MMX support (must be 1) + Bitfield<1> sse; // XSAVE support for MXCSR and XMM registers + Bitfield<2> avx; // AVX enabled and XSAVE support for upper halves of YMM + // registers + Bitfield<3> bndreg; // MPX enabled and XSAVE support for BND0-BND3 + // registers + Bitfield<4> bndsrc; // MPX enabled and XSAVE support for BNDCFGU and + // BNDSTATUS registers + Bitfield<5> opmask; // AVX-512 enabled and XSAVE support for opmask + // registers k0-k7 + Bitfield<6> zmm_hi256; // AVX-512 enabled and XSAVE support for upper + // halves of lower ZMM registers + Bitfield<7> hi16_zmm; // AVX-512 enabled and XSAVE support for upper ZMM + // registers + Bitfield<9> pkru; // XSAVE support for PKRU register +EndBitUnion(XCR0) + BitUnion64(DR6) Bitfield<0> b0; Bitfield<1> b1; diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh index e4a639277c..8006524bb7 100644 --- a/src/arch/x86/x86_traits.hh +++ b/src/arch/x86/x86_traits.hh @@ -55,6 +55,7 @@ namespace X86ISA const int NumCRegs = 16; const int NumDRegs = 8; + const int NumXCRegs = 1; const int NumSegments = 6; const int NumSysSegments = 4; diff --git a/util/cpt_upgraders/x86-add-xcr0.py b/util/cpt_upgraders/x86-add-xcr0.py new file mode 100644 index 0000000000..fe81fcde99 --- /dev/null +++ b/util/cpt_upgraders/x86-add-xcr0.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +def upgrader(cpt): + """ + Update the checkpoint to include the XCR0 register if X86 checkpoint. + The value is set to the default of 1. + """ + + import re + + for sec in cpt.sections(): + if re.search(r".*sys.*\.cpu.*\.isa$", sec): + if cpt.get(sec, "isaName") == "x86": + regVals = cpt.get(sec, "regVal") + + # Add the default value of XCR0 (1) if missing + regVals = f"{regVals} 1" + cpt.set(sec, "regVal", regVals)