From a6f2c8afdbde21b8f776007c20021d6999986cde Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 4 Apr 2024 08:32:08 -0700 Subject: [PATCH] arch-x86: Add XCR0 register and add to X86KvmCPU The extended control registers were not being updated in the KVM thread context nor updated in the KVM state. This was causing issues when checkpointing since the XCR0 value was reverting to the default value rather than what it was previously before the checkpoint. THis was causing multiple applications to crash due to executing instructions which are now illegal instructions due to XCR0 being incorrect. This commit adds the XCR0 as a misc register similar to the exiting x86 control registers and adds all of the helper functions to access and set the register value. It also adds support for updating the KVM CPU's state with the register value and updating the thread context's misc reg value so that it is checkpointed along with the other misc regs. Note that this does *not* add support for XSAVE of the AVX state (i.e., the upper 128 bits of YMM registers). It does however fix the immediate problem in issue #958 . A checkpoint upgrader is also provided to add the default value of XCR0 if the checkpoint tag is missing. Change-Id: I97456c8b57cbc7b381bd4be94944ce6567a43c76 --- src/arch/x86/fs_workload.cc | 5 ++++ src/arch/x86/isa.cc | 2 ++ src/arch/x86/kvm/x86_cpu.cc | 30 +++++++++++++++++++ src/arch/x86/kvm/x86_cpu.hh | 4 +++ src/arch/x86/regs/misc.hh | 28 ++++++++++++++++++ src/arch/x86/x86_traits.hh | 1 + util/cpt_upgraders/x86-add-xcr0.py | 46 ++++++++++++++++++++++++++++++ 7 files changed, 116 insertions(+) create mode 100644 util/cpt_upgraders/x86-add-xcr0.py diff --git a/src/arch/x86/fs_workload.cc b/src/arch/x86/fs_workload.cc index 88d7deed68..1ac5d439d1 100644 --- a/src/arch/x86/fs_workload.cc +++ b/src/arch/x86/fs_workload.cc @@ -302,6 +302,11 @@ FsWorkload::initState() // Point to the page tables. tc->setMiscReg(misc_reg::Cr3, PageMapLevel4); + // Only used if cr4.osxsave is set + XCR0 xcr0 = tc->readMiscRegNoEffect(misc_reg::Xcr0); + xcr0.x87 = 1; // Must be 1 according to x86 specification + tc->setMiscReg(misc_reg::Xcr0, xcr0); + Efer efer = tc->readMiscRegNoEffect(misc_reg::Efer); // Enable long mode. efer.lme = 1; diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 4efedbc894..fd3e21273f 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -338,6 +338,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) break; case misc_reg::Cr8: break; + case misc_reg::Xcr0: + break; case misc_reg::Rflags: { RFLAGS rflags = val; diff --git a/src/arch/x86/kvm/x86_cpu.cc b/src/arch/x86/kvm/x86_cpu.cc index da6e1bb9e1..67df843000 100644 --- a/src/arch/x86/kvm/x86_cpu.cc +++ b/src/arch/x86/kvm/x86_cpu.cc @@ -732,6 +732,7 @@ X86KvmCPU::updateKvmState() updateKvmStateSRegs(); updateKvmStateFPU(); updateKvmStateMSRs(); + updateKvmStateXCRs(); DPRINTF(KvmContext, "X86KvmCPU::updateKvmState():\n"); if (debug::KvmContext) @@ -996,6 +997,22 @@ X86KvmCPU::updateKvmStateMSRs() setMSRs(msrs); } +void +X86KvmCPU::updateKvmStateXCRs() +{ + struct kvm_xcrs xcrs; + + xcrs.nr_xcrs = NumXCRegs; + xcrs.flags = 0; + + for (int i = 0; i < xcrs.nr_xcrs; ++i) { + xcrs.xcrs[i].xcr = i; + xcrs.xcrs[i].value = tc->readMiscReg(misc_reg::xcr(i)); + } + + setXCRs(xcrs); +} + void X86KvmCPU::updateThreadContext() { @@ -1023,6 +1040,7 @@ X86KvmCPU::updateThreadContext() updateThreadContextFPU(fpu); } updateThreadContextMSRs(); + updateThreadContextXCRs(); // The M5 misc reg caches some values from other // registers. Writing to it with side effects causes it to be @@ -1189,6 +1207,18 @@ X86KvmCPU::updateThreadContextMSRs() } } +void +X86KvmCPU::updateThreadContextXCRs() +{ + struct kvm_xcrs xcrs; + + getXCRs(xcrs); + + for (int i = 0; i < xcrs.nr_xcrs; ++i) { + tc->setMiscReg(misc_reg::xcr(xcrs.xcrs[i].xcr), xcrs.xcrs[i].value); + } +} + void X86KvmCPU::deliverInterrupts() { diff --git a/src/arch/x86/kvm/x86_cpu.hh b/src/arch/x86/kvm/x86_cpu.hh index 33f58d7997..f08d858db8 100644 --- a/src/arch/x86/kvm/x86_cpu.hh +++ b/src/arch/x86/kvm/x86_cpu.hh @@ -217,6 +217,8 @@ class X86KvmCPU : public BaseKvmCPU void updateKvmStateFPUXSave(); /** Update MSR registers */ void updateKvmStateMSRs(); + /** Update XCR registers */ + void updateKvmStateXCRs(); /** @} */ /** @@ -236,6 +238,8 @@ class X86KvmCPU : public BaseKvmCPU void updateThreadContextXSave(const struct kvm_xsave &kxsave); /** Update MSR registers */ void updateThreadContextMSRs(); + /** Update XCR registers */ + void updateThreadContextXCRs(); /** @} */ /** Transfer gem5's CPUID values into the virtual CPU. */ diff --git a/src/arch/x86/regs/misc.hh b/src/arch/x86/regs/misc.hh index 1784142dbe..535d251948 100644 --- a/src/arch/x86/regs/misc.hh +++ b/src/arch/x86/regs/misc.hh @@ -405,6 +405,9 @@ enum : RegIndex // "Fake" MSRs for internally implemented devices PciConfigAddress, + XcrBase, + Xcr0 = XcrBase, + NumRegs }; @@ -424,6 +427,13 @@ cr(int index) return CrBase + index; } +static inline RegIndex +xcr(int index) +{ + assert(index >= 0 && index < NumXCRegs); + return XcrBase + index; +} + static inline RegIndex dr(int index) { @@ -649,6 +659,24 @@ BitUnion64(CR8) Bitfield<3, 0> tpr; // Task Priority Register EndBitUnion(CR8) +BitUnion64(XCR0) + Bitfield<0> x87; // x87 FPU/MMX support (must be 1) + Bitfield<1> sse; // XSAVE support for MXCSR and XMM registers + Bitfield<2> avx; // AVX enabled and XSAVE support for upper halves of YMM + // registers + Bitfield<3> bndreg; // MPX enabled and XSAVE support for BND0-BND3 + // registers + Bitfield<4> bndsrc; // MPX enabled and XSAVE support for BNDCFGU and + // BNDSTATUS registers + Bitfield<5> opmask; // AVX-512 enabled and XSAVE support for opmask + // registers k0-k7 + Bitfield<6> zmm_hi256; // AVX-512 enabled and XSAVE support for upper + // halves of lower ZMM registers + Bitfield<7> hi16_zmm; // AVX-512 enabled and XSAVE support for upper ZMM + // registers + Bitfield<9> pkru; // XSAVE support for PKRU register +EndBitUnion(XCR0) + BitUnion64(DR6) Bitfield<0> b0; Bitfield<1> b1; diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh index e4a639277c..8006524bb7 100644 --- a/src/arch/x86/x86_traits.hh +++ b/src/arch/x86/x86_traits.hh @@ -55,6 +55,7 @@ namespace X86ISA const int NumCRegs = 16; const int NumDRegs = 8; + const int NumXCRegs = 1; const int NumSegments = 6; const int NumSysSegments = 4; diff --git a/util/cpt_upgraders/x86-add-xcr0.py b/util/cpt_upgraders/x86-add-xcr0.py new file mode 100644 index 0000000000..fe81fcde99 --- /dev/null +++ b/util/cpt_upgraders/x86-add-xcr0.py @@ -0,0 +1,46 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +def upgrader(cpt): + """ + Update the checkpoint to include the XCR0 register if X86 checkpoint. + The value is set to the default of 1. + """ + + import re + + for sec in cpt.sections(): + if re.search(r".*sys.*\.cpu.*\.isa$", sec): + if cpt.get(sec, "isaName") == "x86": + regVals = cpt.get(sec, "regVal") + + # Add the default value of XCR0 (1) if missing + regVals = f"{regVals} 1" + cpt.set(sec, "regVal", regVals)