From 5df52e0dcaf7452a19caa270c9485d027176b0f2 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 10 Jan 2022 21:02:27 -0800 Subject: [PATCH] arch-x86: Overhaul how address size is handled, particularly for stack. The stack size is something that applies to addresses when performing accesses as part of some instructions. This was handled inconsistently or incompletely or simply incorrectly in a few ways. First, when pushing or popping from the stack, the *address size* should be set to the stack size. The data size is generally the operand size. When the stack pointer is incremented/decremented, it should be changed by the data size. When a stack pointer is manipulated, the data size for those calculations should be the stack size. Importantly that does not change the value of the increment/decrement, which is the operand size still. This usage has been fixed throughout. The TLB generally needs to know what the address size was in order to figure out what segment offset was used so that it can do limit checks. There is some inherent inaccuracy in doing things in reverse like this, but that's how it works currently. To find that size, the TLB tried to start from first principles to figure out what the default address size was, and then whether there was an override was passed in through the request flags. This is *very* inaccurate for a few reasons. First, the override doesn't always apply. Second, the address size used by a particular instruction doesn't have to be based on any particular size, whether that is the default or alternate address size, the stack size, etc. Instead, the instructions now pass the actual size being used in as a 2 bit value (0 -> 1 byte, 1 -> 2 bytes, 2 -> 4 bytes, 3 -> 8 bytes), avoiding most of the inaccuracy and approximation. Because the CPU won't embed any size information into fetches, we'll just assume those have no wrap around within the address size. Finally, there were microops that had been added which overrode the address size to be the stack size internally, and try to help the TLB figure out what to do to figure out the address size. Because both of those things are now handled in a different way, those microops are no longer needed or used and have been deleted. Change-Id: I2b1bdf1acf1540bf643fac6d49fe1a5a576ba5c1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/55443 Tested-by: kokoro Maintainer: Gabe Black Reviewed-by: Gabe Black --- src/arch/amdgpu/common/tlb.cc | 20 +--- .../general_purpose/control_transfer/call.py | 16 +-- .../interrupts_and_exceptions.py | 26 ++--- .../control_transfer/xreturn.py | 22 ++-- .../data_transfer/stack_operations.py | 101 +++++++++--------- .../general_purpose/flags/push_and_pop.py | 8 +- src/arch/x86/isa/microops/ldstop.isa | 92 ++++++---------- src/arch/x86/ldstflags.hh | 14 +-- src/arch/x86/tlb.cc | 20 ++-- 9 files changed, 143 insertions(+), 176 deletions(-) diff --git a/src/arch/amdgpu/common/tlb.cc b/src/arch/amdgpu/common/tlb.cc index 7bee4c30e2..698570b459 100644 --- a/src/arch/amdgpu/common/tlb.cc +++ b/src/arch/amdgpu/common/tlb.cc @@ -467,18 +467,10 @@ namespace X86ISA Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); - // This assumes we're not in 64 bit mode. If we were, the - // default address size is 64 bits, overridable to 32. - int size = 32; - bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); - SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); + Addr logSize = (flags >> AddrSizeFlagShift) & AddrSizeFlagMask; + int size = 8 << logSize; - if ((csAttr.defaultSize && sizeOverride) || - (!csAttr.defaultSize && !sizeOverride)) { - size = 16; - } - - Addr offset = bits(vaddr - base, size - 1, 0); + Addr offset = (vaddr - base) & mask(size); Addr endOffset = offset + req->getSize() - 1; if (expandDown) { @@ -552,8 +544,7 @@ namespace X86ISA } // Do paging protection checks. - bool inUser = (m5Reg.cpl == 3 && - !(flags & (CPL0FlagBit << FlagShift))); + bool inUser = m5Reg.cpl == 3 && !(flags & CPL0FlagBit); CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); bool badWrite = (!entry->writable && (inUser || cr0.wp)); @@ -765,8 +756,7 @@ namespace X86ISA bool storeCheck = flags & Request::READ_MODIFY_WRITE; // Do paging protection checks. - bool inUser - = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); + bool inUser = m5Reg.cpl == 3 && !(flags & CPL0FlagBit); CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py index 31b81e9e08..8000bc3e2a 100644 --- a/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/call.py @@ -44,8 +44,8 @@ def macroop CALL_NEAR_I limm t1, imm rdip t7 # Check target of call - stis t7, ss, [0, t0, rsp], "-env.dataSize" - subi rsp, rsp, dsz + st t7, ss, [0, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz wrip t7, t1 }; @@ -58,8 +58,8 @@ def macroop CALL_NEAR_R rdip t1 # Check target of call - stis t1, ss, [0, t0, rsp], "-env.dataSize" - subi rsp, rsp, dsz + st t1, ss, [0, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz wripi reg, 0 }; @@ -73,8 +73,8 @@ def macroop CALL_NEAR_M rdip t7 ld t1, seg, sib, disp # Check target of call - st t7, ss, [0, t0, rsp], "-env.dataSize" - subi rsp, rsp, dsz + st t7, ss, [0, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz wripi t1, 0 }; @@ -88,8 +88,8 @@ def macroop CALL_NEAR_P rdip t7 ld t1, seg, riprel, disp # Check target of call - st t7, ss, [0, t0, rsp], "-env.dataSize" - subi rsp, rsp, dsz + st t7, ss, [0, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz wripi t1, 0 }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py index 983d95ee7c..2e7a2d1998 100644 --- a/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/interrupts_and_exceptions.py @@ -83,9 +83,9 @@ def macroop IRET_PROT { #t4 = handy m5 register # Pop temp_RIP, temp_CS, and temp_RFLAGS - ld t1, ss, [1, t0, rsp], "0 * env.stackSize", dataSize=ssz - ld t2, ss, [1, t0, rsp], "1 * env.stackSize", dataSize=ssz - ld t3, ss, [1, t0, rsp], "2 * env.stackSize", dataSize=ssz + ld t1, ss, [1, t0, rsp], "0 * env.dataSize", addressSize=ssz + ld t2, ss, [1, t0, rsp], "1 * env.dataSize", addressSize=ssz + ld t3, ss, [1, t0, rsp], "2 * env.dataSize", addressSize=ssz # Read the handy m5 register for use later rdm5reg t4 @@ -130,10 +130,10 @@ protToVirtFallThrough: andi t6, t2, 0xF8, dataSize=8 andi t0, t2, 0x4, flags=(EZF,), dataSize=2 br label("globalCSDescriptor"), flags=(CEZF,) - ld t8, tsl, [1, t0, t6], dataSize=8, atCPL0=True + ld t8, tsl, [1, t0, t6], dataSize=8, addressSize=8, atCPL0=True br label("processCSDescriptor") globalCSDescriptor: - ld t8, tsg, [1, t0, t6], dataSize=8, atCPL0=True + ld t8, tsg, [1, t0, t6], dataSize=8, addressSize=8, atCPL0=True processCSDescriptor: chks t2, t6, dataSize=8 @@ -165,32 +165,32 @@ processCSDescriptor: br label("doPopStackStuff"), flags=(nCEZF,) # We can modify user visible state here because we're know # we're done with things that can fault. - addi rsp, rsp, "3 * env.stackSize" + addi rsp, rsp, "3 * env.dataSize", dataSize=ssz br label("fallThroughPopStackStuff") doPopStackStuffAndCheckRIP: # Check if the RIP is canonical. - srai t7, t1, 47, flags=(EZF,), dataSize=ssz + srai t7, t1, 47, flags=(EZF,), dataSize=8 # if t7 isn't 0 or -1, it wasn't canonical. br label("doPopStackStuff"), flags=(CEZF,) - addi t0, t7, 1, flags=(EZF,), dataSize=ssz + addi t0, t7, 1, flags=(EZF,), dataSize=8 fault "std::make_shared(0)", flags=(nCEZF,) doPopStackStuff: # POP.v temp_RSP - ld t6, ss, [1, t0, rsp], "3 * env.dataSize", dataSize=ssz + ld t6, ss, [1, t0, rsp], "3 * env.dataSize", addressSize=ssz # POP.v temp_SS - ld t9, ss, [1, t0, rsp], "4 * env.dataSize", dataSize=ssz + ld t9, ss, [1, t0, rsp], "4 * env.dataSize", addressSize=ssz # SS = READ_DESCRIPTOR (temp_SS, ss_chk) andi t0, t9, 0xFC, flags=(EZF,), dataSize=2 br label("processSSDescriptor"), flags=(CEZF,) andi t7, t9, 0xF8, dataSize=8 andi t0, t9, 0x4, flags=(EZF,), dataSize=2 br label("globalSSDescriptor"), flags=(CEZF,) - ld t7, tsl, [1, t0, t7], dataSize=8, atCPL0=True + ld t7, tsl, [1, t0, t7], dataSize=8, addressSize=8, atCPL0=True br label("processSSDescriptor") globalSSDescriptor: - ld t7, tsg, [1, t0, t7], dataSize=8, atCPL0=True + ld t7, tsg, [1, t0, t7], dataSize=8, addressSize=8, atCPL0=True processSSDescriptor: chks t9, t7, dataSize=8 @@ -243,7 +243,7 @@ skipSegmentSquashing: # RF cleared #RIP = temp_RIP - wrip t0, t1, dataSize=ssz + wrip t0, t1 }; def macroop IRET_VIRT { diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py index b3f09affc9..fea944826e 100644 --- a/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py +++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/xreturn.py @@ -41,9 +41,9 @@ def macroop RET_NEAR .function_return .control_indirect - ld t1, ss, [1, t0, rsp] + ld t1, ss, [1, t0, rsp], addressSize=ssz # Check address of return - addi rsp, rsp, dsz + addi rsp, rsp, dsz, dataSize=ssz wripi t1, 0 }; @@ -55,10 +55,10 @@ def macroop RET_NEAR_I .control_indirect limm t2, imm - ld t1, ss, [1, t0, rsp] + ld t1, ss, [1, t0, rsp], addressSize=ssz # Check address of return - addi rsp, rsp, dsz - add rsp, rsp, t2 + addi rsp, rsp, dsz, dataSize=ssz + add rsp, rsp, t2, dataSize=ssz wripi t1, 0 }; @@ -91,15 +91,15 @@ def macroop RET_FAR { .control_indirect # Get the return RIP - ld t1, ss, [1, t0, rsp] + ld t1, ss, [1, t0, rsp], addressSize=ssz # Get the return CS - ld t2, ss, [1, t0, rsp], ssz + ld t2, ss, [1, t0, rsp], dsz, addressSize=ssz # increment the stack pointer to pop the instruction pointer # and the code segment from the stack. - addi rsp, rsp, dsz - addi rsp, rsp, dsz + addi rsp, rsp, dsz, dataSize=ssz + addi rsp, rsp, dsz, dataSize=ssz # Get the rpl andi t3, t2, 0x3 @@ -115,10 +115,10 @@ def macroop RET_FAR { andi t3, t2, 0xF8, dataSize=8 andi t0, t2, 0x4, flags=(EZF,), dataSize=2 br label("globalDescriptor"), flags=(CEZF,) - ld t3, tsl, [1, t0, t3], dataSize=8 + ld t3, tsl, [1, t0, t3], dataSize=8, addressSize=8 br label("processDescriptor") globalDescriptor: - ld t3, tsg, [1, t0, t3], dataSize=8 + ld t3, tsg, [1, t0, t3], dataSize=8, addressSize=8 processDescriptor: chks t2, t3, IretCheck, dataSize=8 # There should be validity checks on the RIP checks here, but I'll do diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py index acd2bb1d36..350b57e7f7 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py @@ -38,8 +38,8 @@ def macroop POP_R { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - ldis t1, ss, [1, t0, rsp], dataSize=ssz - addi rsp, rsp, ssz, dataSize=asz + ld t1, ss, [1, t0, rsp], addressSize=ssz + addi rsp, rsp, dsz, dataSize=ssz mov reg, reg, t1 }; @@ -47,10 +47,10 @@ def macroop POP_M { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - ldis t1, ss, [1, t0, rsp], dataSize=ssz - cda seg, sib, disp, dataSize=ssz - addi rsp, rsp, ssz, dataSize=asz - st t1, seg, sib, disp, dataSize=ssz + ld t1, ss, [1, t0, rsp], addressSize=ssz + cda seg, sib, disp + addi rsp, rsp, dsz, dataSize=ssz + st t1, seg, sib, disp }; def macroop POP_P { @@ -58,18 +58,18 @@ def macroop POP_P { .adjust_env oszIn64Override rdip t7 - ld t1, ss, [1, t0, rsp], dataSize=ssz - cda seg, sib, disp, dataSize=ssz - addi rsp, rsp, ssz, dataSize=asz - st t1, seg, riprel, disp, dataSize=ssz + ld t1, ss, [1, t0, rsp], addressSize=ssz + cda seg, sib, disp + addi rsp, rsp, dsz, dataSize=ssz + st t1, seg, riprel, disp }; def macroop PUSH_R { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - stis reg, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - subi rsp, rsp, ssz + st reg, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz }; def macroop PUSH_I { @@ -77,17 +77,17 @@ def macroop PUSH_I { .adjust_env oszIn64Override limm t1, imm - stis t1, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - subi rsp, rsp, ssz + st t1, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz }; def macroop PUSH_M { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - ld t1, seg, sib, disp, dataSize=ssz - st t1, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - subi rsp, rsp, ssz + ld t1, seg, sib, disp + st t1, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz }; def macroop PUSH_P { @@ -95,9 +95,9 @@ def macroop PUSH_P { .adjust_env oszIn64Override rdip t7 - ld t1, seg, riprel, disp, dataSize=ssz - st t1, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - subi rsp, rsp, ssz + ld t1, seg, riprel, disp + st t1, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz }; def macroop PUSH_S { @@ -109,42 +109,41 @@ def macroop PUSH_S { def macroop PUSHA { # Check all the stack addresses. We'll assume that if the beginning and # end are ok, then the stuff in the middle should be as well. - cda ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - cda ss, [1, t0, rsp], "-8 * env.stackSize", dataSize=ssz - st rax, ss, [1, t0, rsp], "1 * -env.stackSize", dataSize=ssz - st rcx, ss, [1, t0, rsp], "2 * -env.stackSize", dataSize=ssz - st rdx, ss, [1, t0, rsp], "3 * -env.stackSize", dataSize=ssz - st rbx, ss, [1, t0, rsp], "4 * -env.stackSize", dataSize=ssz - st rsp, ss, [1, t0, rsp], "5 * -env.stackSize", dataSize=ssz - st rbp, ss, [1, t0, rsp], "6 * -env.stackSize", dataSize=ssz - st rsi, ss, [1, t0, rsp], "7 * -env.stackSize", dataSize=ssz - st rdi, ss, [1, t0, rsp], "8 * -env.stackSize", dataSize=ssz - subi rsp, rsp, "8 * env.stackSize" + cda ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + cda ss, [1, t0, rsp], "-8 * env.dataSize", addressSize=ssz + st rax, ss, [1, t0, rsp], "1 * -env.dataSize", addressSize=ssz + st rcx, ss, [1, t0, rsp], "2 * -env.dataSize", addressSize=ssz + st rdx, ss, [1, t0, rsp], "3 * -env.dataSize", addressSize=ssz + st rbx, ss, [1, t0, rsp], "4 * -env.dataSize", addressSize=ssz + st rsp, ss, [1, t0, rsp], "5 * -env.dataSize", addressSize=ssz + st rbp, ss, [1, t0, rsp], "6 * -env.dataSize", addressSize=ssz + st rsi, ss, [1, t0, rsp], "7 * -env.dataSize", addressSize=ssz + st rdi, ss, [1, t0, rsp], "8 * -env.dataSize", addressSize=ssz + subi rsp, rsp, "8 * env.dataSize", dataSize=ssz }; def macroop POPA { # Check all the stack addresses. We'll assume that if the beginning and # end are ok, then the stuff in the middle should be as well. - ld t1, ss, [1, t0, rsp], "0 * env.stackSize", dataSize=ssz - ld t2, ss, [1, t0, rsp], "7 * env.stackSize", dataSize=ssz - mov rdi, rdi, t1, dataSize=ssz - ld rsi, ss, [1, t0, rsp], "1 * env.stackSize", dataSize=ssz - ld rbp, ss, [1, t0, rsp], "2 * env.stackSize", dataSize=ssz - ld rbx, ss, [1, t0, rsp], "4 * env.stackSize", dataSize=ssz - ld rdx, ss, [1, t0, rsp], "5 * env.stackSize", dataSize=ssz - ld rcx, ss, [1, t0, rsp], "6 * env.stackSize", dataSize=ssz - mov rax, rax, t2, dataSize=ssz - addi rsp, rsp, "8 * env.stackSize", dataSize=asz + ld t1, ss, [1, t0, rsp], "0 * env.dataSize", addressSize=ssz + ld rax, ss, [1, t0, rsp], "7 * env.dataSize", addressSize=ssz + mov rdi, rdi, t1 + ld rsi, ss, [1, t0, rsp], "1 * env.dataSize", addressSize=ssz + ld rbp, ss, [1, t0, rsp], "2 * env.dataSize", addressSize=ssz + ld rbx, ss, [1, t0, rsp], "4 * env.dataSize", addressSize=ssz + ld rdx, ss, [1, t0, rsp], "5 * env.dataSize", addressSize=ssz + ld rcx, ss, [1, t0, rsp], "6 * env.dataSize", addressSize=ssz + addi rsp, rsp, "8 * env.dataSize", dataSize=ssz }; def macroop LEAVE { # Make the default data size of pops 64 bits in 64 bit mode .adjust_env oszIn64Override - mov t1, t1, rbp, dataSize=ssz - ldis rbp, ss, [1, t0, t1], dataSize=ssz + mov t1, t1, rbp + ld rbp, ss, [1, t0, t1], addressSize=ssz mov rsp, rsp, t1, dataSize=ssz - addi rsp, rsp, ssz, dataSize=ssz + addi rsp, rsp, dsz, dataSize=ssz }; def macroop ENTER_I_I { @@ -160,8 +159,8 @@ def macroop ENTER_I_I { # t1 is now the masked nesting level, and t2 is the amount of storage. # Push rbp. - stis rbp, ss, [1, t0, rsp], "-env.dataSize" - subi rsp, rsp, ssz + st rbp, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz # Save the stack pointer for later mov t6, t6, rsp @@ -176,9 +175,9 @@ def macroop ENTER_I_I { limm t4, "-1ULL", dataSize=8 topOfLoop: - ldis t5, ss, [dsz, t4, rbp] - stis t5, ss, [1, t0, rsp], "-env.dataSize" - subi rsp, rsp, ssz + ld t5, ss, [dsz, t4, rbp], addressSize=ssz + st t5, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz # If we're not done yet, loop subi t4, t4, 1, dataSize=8 @@ -187,8 +186,8 @@ topOfLoop: bottomOfLoop: # Push the old rbp onto the stack - stis t6, ss, [1, t0, rsp], "-env.dataSize" - subi rsp, rsp, ssz + st t6, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz skipLoop: sub rsp, rsp, t2, dataSize=ssz diff --git a/src/arch/x86/isa/insts/general_purpose/flags/push_and_pop.py b/src/arch/x86/isa/insts/general_purpose/flags/push_and_pop.py index d2e0dc745f..4a0ca5630a 100644 --- a/src/arch/x86/isa/insts/general_purpose/flags/push_and_pop.py +++ b/src/arch/x86/isa/insts/general_purpose/flags/push_and_pop.py @@ -38,15 +38,15 @@ def macroop PUSHF { .adjust_env oszIn64Override rflags t1 - st t1, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz - subi rsp, rsp, ssz + st t1, ss, [1, t0, rsp], "-env.dataSize", addressSize=ssz + subi rsp, rsp, dsz, dataSize=ssz }; def macroop POPF { .adjust_env oszIn64Override - ld t1, ss, [1, t0, rsp], dataSize=ssz - addi rsp, rsp, ssz + ld t1, ss, [1, t0, rsp], addressSize=ssz + addi rsp, rsp, dsz, dataSize=ssz wrflags t1, t0 }; ''' diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 72cb7bf530..99a381a9c2 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -296,7 +296,7 @@ let {{ class LdStOp(X86Microop): def __init__(self, data, segment, addr, disp, dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, - implicitStack, uncacheable): + uncacheable): self.data = data [self.scale, self.index, self.base] = addr self.disp = disp @@ -305,7 +305,7 @@ let {{ self.addressSize = addressSize self.memFlags = baseFlags if atCPL0: - self.memFlags += " | (CPL0FlagBit << FlagShift)" + self.memFlags += " | CPL0FlagBit" self.instFlags = "" if prefetch: self.memFlags += " | Request::PREFETCH" @@ -313,12 +313,10 @@ let {{ if nonSpec: self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)" if uncacheable: - self.instFlags += " | (Request::UNCACHEABLE)" - # For implicit stack operations, we should use *not* use the - # alternative addressing mode for loads/stores if the prefix is set - if not implicitStack: - self.memFlags += " | (machInst.legacy.addr ? " + \ - "(AddrSizeFlagBit << FlagShift) : 0)" + self.instFlags += " | Request::UNCACHEABLE" + self.memFlags += \ + " | ((log2i(%s) & AddrSizeFlagMask) << AddrSizeFlagShift)" % \ + addressSize def getAllocator(self, microFlags): allocator = '''new %(class_name)s(machInst, macrocodeBlock, @@ -338,7 +336,7 @@ let {{ class BigLdStOp(X86Microop): def __init__(self, data, segment, addr, disp, dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, - implicitStack, uncacheable): + uncacheable): self.data = data [self.scale, self.index, self.base] = addr self.disp = disp @@ -347,7 +345,7 @@ let {{ self.addressSize = addressSize self.memFlags = baseFlags if atCPL0: - self.memFlags += " | (CPL0FlagBit << FlagShift)" + self.memFlags += " | CPL0FlagBit" self.instFlags = "" if prefetch: self.memFlags += " | Request::PREFETCH" @@ -355,12 +353,10 @@ let {{ if nonSpec: self.instFlags += " | (1ULL << StaticInst::IsNonSpeculative)" if uncacheable: - self.instFlags += " | (Request::UNCACHEABLE)" - # For implicit stack operations, we should use *not* use the - # alternative addressing mode for loads/stores if the prefix is set - if not implicitStack: - self.memFlags += " | (machInst.legacy.addr ? " + \ - "(AddrSizeFlagBit << FlagShift) : 0)" + self.instFlags += " | Request::UNCACHEABLE" + self.memFlags += \ + " | ((log2i(%s) & AddrSizeFlagMask) << AddrSizeFlagShift)" % \ + addressSize def getAllocator(self, microFlags): allocString = ''' @@ -388,10 +384,10 @@ let {{ class LdStSplitOp(LdStOp): def __init__(self, data, segment, addr, disp, dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, - implicitStack, uncacheable): + uncacheable): super().__init__(0, segment, addr, disp, dataSize, addressSize, baseFlags, atCPL0, prefetch, nonSpec, - implicitStack, uncacheable) + uncacheable) (self.dataLow, self.dataHi) = data def getAllocator(self, microFlags): @@ -422,8 +418,9 @@ let {{ self.dataSize = dataSize self.addressSize = addressSize self.instFlags = "" - self.memFlags = baseFlags + "| " \ - "(machInst.legacy.addr ? (AddrSizeFlagBit << FlagShift) : 0)" + self.memFlags = baseFlags + \ + " | ((log2i(%s) & AddrSizeFlagMask) << AddrSizeFlagShift)" % \ + addressSize def getAllocator(self, microFlags): allocator = '''new %(class_name)s(machInst, macrocodeBlock, @@ -457,7 +454,7 @@ let {{ def defineMicroLoadOp(mnemonic, code, bigCode='', mem_flags="0", big=True, nonSpec=False, - implicitStack=False, is_float=False): + is_float=False): global header_output global decoder_output global exec_output @@ -486,26 +483,18 @@ let {{ exec_output += MicroLoadInitiateAcc.subst(iop) exec_output += MicroLoadCompleteAcc.subst(iop) - if implicitStack: - # For instructions that implicitly access the stack, the address - # size is the same as the stack segment pointer size, not the - # address size if specified by the instruction prefix - addressSize = "env.stackSize" - else: - addressSize = "env.addressSize" - base = LdStOp if big: base = BigLdStOp class LoadOp(base): def __init__(self, data, segment, addr, disp = 0, dataSize="env.dataSize", - addressSize=addressSize, + addressSize="env.addressSize", atCPL0=False, prefetch=False, nonSpec=nonSpec, - implicitStack=implicitStack, uncacheable=False): + uncacheable=False): super().__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, - atCPL0, prefetch, nonSpec, implicitStack, uncacheable) + atCPL0, prefetch, nonSpec, uncacheable) self.className = Name self.mnemonic = name @@ -513,9 +502,6 @@ let {{ defineMicroLoadOp('Ld', 'Data = merge(Data, data, Mem, dataSize);', 'Data = Mem & mask(dataSize * 8);') - defineMicroLoadOp('Ldis', 'Data = merge(Data, data, Mem, dataSize);', - 'Data = Mem & mask(dataSize * 8);', - implicitStack=True) defineMicroLoadOp('Ldst', 'Data = merge(Data, data, Mem, dataSize);', 'Data = Mem & mask(dataSize * 8);', 'Request::READ_MODIFY_WRITE') @@ -584,10 +570,10 @@ let {{ dataSize="env.dataSize", addressSize="env.addressSize", atCPL0=False, prefetch=False, nonSpec=nonSpec, - implicitStack=False, uncacheable=False): + uncacheable=False): super().__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, - atCPL0, prefetch, nonSpec, implicitStack, uncacheable) + atCPL0, prefetch, nonSpec, uncacheable) self.className = Name self.mnemonic = name @@ -606,7 +592,7 @@ let {{ nonSpec=True) def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0", - implicitStack=False, is_float=False, has_data=True): + is_float=False, has_data=True): global header_output global decoder_output global exec_output @@ -632,29 +618,22 @@ let {{ exec_output += MicroStoreInitiateAcc.subst(iop) exec_output += MicroStoreCompleteAcc.subst(iop) - if implicitStack: - # For instructions that implicitly access the stack, the address - # size is the same as the stack segment pointer size, not the - # address size if specified by the instruction prefix - addressSize = "env.stackSize" - else: - addressSize = "env.addressSize" if has_data: class StoreOp(LdStOp): def __init__(self, data, segment, addr, disp=0, - dataSize="env.dataSize", addressSize=addressSize, - atCPL0=False, nonSpec=False, - implicitStack=implicitStack, uncacheable=False): + dataSize="env.dataSize", addressSize="env.addressSize", + atCPL0=False, nonSpec=False, uncacheable=False): super().__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, atCPL0, False, - nonSpec, implicitStack, uncacheable) + nonSpec, uncacheable) self.className = Name self.mnemonic = name else: class StoreOp(MemNoDataOp): def __init__(self, segment, addr, disp=0, - dataSize="env.dataSize", addressSize=addressSize): + dataSize="env.dataSize", + addressSize="env.addressSize"): super().__init__(segment, addr, disp, dataSize, addressSize, mem_flags) self.className = Name @@ -663,10 +642,8 @@ let {{ microopClasses[name] = StoreOp defineMicroStoreOp('St', 'Mem = PData;') - defineMicroStoreOp('Stis', 'Mem = PData;', - implicitStack=True) - defineMicroStoreOp('Stul', 'Mem = PData;', - mem_flags="Request::LOCKED_RMW") + defineMicroStoreOp('Stis', 'Mem = PData;') + defineMicroStoreOp('Stul', 'Mem = PData;', mem_flags="Request::LOCKED_RMW") defineMicroStoreOp('Stfp', code='Mem = FpData_uqw;', is_float=True) @@ -718,11 +695,10 @@ let {{ def __init__(self, data, segment, addr, disp = 0, dataSize="env.dataSize", addressSize="env.addressSize", - atCPL0=False, nonSpec=False, implicitStack=False, - uncacheable=False): + atCPL0=False, nonSpec=False, uncacheable=False): super().__init__(data, segment, addr, disp, dataSize, addressSize, mem_flags, atCPL0, False, - nonSpec, implicitStack, uncacheable) + nonSpec, uncacheable) self.className = Name self.mnemonic = name @@ -750,7 +726,7 @@ let {{ dataSize="env.dataSize", addressSize="env.addressSize"): super().__init__(data, segment, addr, disp, dataSize, addressSize, "0", - False, False, False, False, False) + False, False, False, False) self.className = "Lea" self.mnemonic = "lea" diff --git a/src/arch/x86/ldstflags.hh b/src/arch/x86/ldstflags.hh index 7465d570ea..c18033e736 100644 --- a/src/arch/x86/ldstflags.hh +++ b/src/arch/x86/ldstflags.hh @@ -50,13 +50,13 @@ namespace gem5 */ namespace X86ISA { - [[maybe_unused]] const Request::FlagsType SegmentFlagMask = mask(4); - const int FlagShift = 4; - enum FlagBit - { - CPL0FlagBit = 1, - AddrSizeFlagBit = 2, - }; + +constexpr Request::FlagsType SegmentFlagMask = mask(4); +constexpr auto CPL0FlagShift = 4; +constexpr auto CPL0FlagBit = 1 << CPL0FlagShift; +constexpr auto AddrSizeFlagShift = CPL0FlagShift + 1; +constexpr auto AddrSizeFlagMask = mask(2); + } // namespace X86ISA } // namespace gem5 diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 3992d732de..ad2609bca7 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -328,6 +328,10 @@ TLB::translate(const RequestPtr &req, HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); + const Addr logAddrSize = (flags >> AddrSizeFlagShift) & AddrSizeFlagMask; + const int addrSize = 8 << logAddrSize; + const Addr addrMask = mask(addrSize); + // If protected mode has been enabled... if (m5Reg.prot) { DPRINTF(TLB, "In protected mode.\n"); @@ -361,11 +365,11 @@ TLB::translate(const RequestPtr &req, } Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); - bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); - unsigned logSize = sizeOverride ? (unsigned)m5Reg.altAddr - : (unsigned)m5Reg.defAddr; - int size = (1 << logSize) * 8; - Addr offset = bits(vaddr - base, size - 1, 0); + Addr offset; + if (mode == BaseMMU::Execute) + offset = vaddr - base; + else + offset = (vaddr - base) & addrMask; Addr endOffset = offset + req->getSize() - 1; if (expandDown) { DPRINTF(TLB, "Checking an expand down segment.\n"); @@ -380,8 +384,7 @@ TLB::translate(const RequestPtr &req, } } } - if (m5Reg.submode != SixtyFourBitMode || - (flags & (AddrSizeFlagBit << FlagShift))) + if (m5Reg.submode != SixtyFourBitMode && addrSize != 64) vaddr &= mask(32); // If paging is enabled, do the translation. if (m5Reg.paging) { @@ -434,8 +437,7 @@ TLB::translate(const RequestPtr &req, DPRINTF(TLB, "Entry found with paddr %#x, " "doing protection checks.\n", entry->paddr); // Do paging protection checks. - bool inUser = (m5Reg.cpl == 3 && - !(flags & (CPL0FlagBit << FlagShift))); + bool inUser = m5Reg.cpl == 3 && !(flags & CPL0FlagBit); CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); bool badWrite = (!entry->writable && (inUser || cr0.wp)); if ((inUser && !entry->user) ||