From a54d3198a820e1f6ae74d8b0eda47f112e4afe6f Mon Sep 17 00:00:00 2001 From: Nicholas Mosier Date: Wed, 29 May 2024 07:54:03 -0700 Subject: [PATCH] arch-x86: break 32/64-bit mov's input dependency on prior dest value (#1172) Fix #1169. Break the input dependency of 32-bit and 64-bit 'mov' micro-ops on the prior value in the destination register. Such a dependency is required for 8-bit and 16-bit moves, as they do not completely overwrite the value in the destination register. However, it is unnecessary for 32-bit moves (which implicitly zero the upper 32 bits) and 64-bit moves. This patch implements the fix by adding a new code template field inside the generated constructors of X86StaticInst's, called `invalidate_srcs`, which instruction implementations like `mov` can use to conditionally invalidate particular source registers as needed. In `mov`'s case, this is when the data size is 32 or 64 bits. Change-Id: Ib2aef6be6da08752640ea3414b90efb7965be924 --- src/arch/x86/isa/microops/regop.isa | 44 ++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa index c7e9f46f10..6c95d2f158 100644 --- a/src/arch/x86/isa/microops/regop.isa +++ b/src/arch/x86/isa/microops/regop.isa @@ -84,6 +84,14 @@ def template MicroRegOpDeclare {{ %(set_reg_idx_arr)s; %(constructor)s; %(cond_control_flag_init)s; + + // In some cases, we need to invalidate some source registers + // to eliminate unnecessary input dependencies (e.g., for a + // 32-bit or 64-bit 'mov' micro-op) on the prior value of the + // destination register. (Such dependencies are required for + // 8-bit or 16-bit moves, however, since they don't completely + // overwrite the destination register.) + %(invalidate_srcs)s; } Fault execute(ExecContext *, trace::InstRecord *) const override; @@ -182,7 +190,7 @@ let {{ class RegOpMeta(type): def buildCppClasses(self, name, Name, suffix, code, big_code, \ flag_code, cond_check, else_code, cond_control_flag_init, - op_class, operand_types): + invalidate_srcs, op_class, operand_types): # Globals to stick the output in global header_output @@ -191,9 +199,9 @@ let {{ # Stick all the code together so it can be searched at once allCode = "|".join((code, flag_code, cond_check, else_code, - cond_control_flag_init)) + cond_control_flag_init, invalidate_srcs)) allBigCode = "|".join((big_code, flag_code, cond_check, else_code, - cond_control_flag_init)) + cond_control_flag_init, invalidate_srcs)) # If op2 is used anywhere, make register and immediate versions # of this code. @@ -219,6 +227,7 @@ let {{ matcher.sub(src2_name, cond_check), matcher.sub(src2_name, else_code), matcher.sub(src2_name, cond_control_flag_init), + matcher.sub(src2_name, invalidate_srcs), op_class, operand_types) imm_name = '(int8_t)imm8' if match.group("prefix") else 'imm8' self.buildCppClasses(name + "i", Name, suffix + "Imm", @@ -228,6 +237,7 @@ let {{ matcher.sub(imm_name, cond_check), matcher.sub(imm_name, else_code), matcher.sub(imm_name, cond_control_flag_init), + matcher.sub(imm_name, invalidate_srcs), op_class, imm_operand_types) return @@ -236,8 +246,8 @@ let {{ if flag_code != "" or cond_check != "true": self.buildCppClasses(name, Name, suffix, code, big_code, "", "true", else_code, - "flags[IsUncondControl] = flags[IsControl];", op_class, - operand_types) + "flags[IsUncondControl] = flags[IsControl];", + invalidate_srcs, op_class, operand_types) suffix = "Flags" + suffix cxx_classes = list([op.cxx_class() for op in operand_types]) @@ -255,6 +265,7 @@ let {{ "cond_check" : cond_check, "else_code" : else_code, "cond_control_flag_init" : cond_control_flag_init, + "invalidate_srcs": invalidate_srcs, "op_class" : op_class})] if big_code != "": iops += [InstObjParams(name, Name + suffix + "Big", base, @@ -263,6 +274,7 @@ let {{ "cond_check" : cond_check, "else_code" : else_code, "cond_control_flag_init" : cond_control_flag_init, + "invalidate_srcs": invalidate_srcs, "op_class" : op_class})] # Generate the actual code (finally!) @@ -292,20 +304,22 @@ let {{ cond_check = cls.cond_check else_code = cls.else_code cond_control_flag_init = cls.cond_control_flag_init + invalidate_srcs = cls.invalidate_srcs op_class = cls.op_class operand_types = cls.operand_types # Set up the C++ classes mcls.buildCppClasses(cls, name, Name, "", code, big_code, flag_code, cond_check, else_code, - cond_control_flag_init, op_class, operand_types) + cond_control_flag_init, invalidate_srcs, op_class, + operand_types) # Hook into the microassembler dict global microopClasses microopClasses[name] = cls allCode = "|".join((code, flag_code, cond_check, else_code, - cond_control_flag_init)) + cond_control_flag_init, invalidate_srcs)) # If op2 is used anywhere, make register and immediate versions # of this code. @@ -326,6 +340,7 @@ let {{ cond_check = "true" else_code = ";" cond_control_flag_init = "" + invalidate_srcs = "" op_class = "IntAluOp" def __init__(self, *ops, flags=None, dataSize="env.dataSize"): @@ -702,6 +717,21 @@ let {{ class Mov(BasicRegOp, CondRegOp): code = 'DestReg = merge(SrcReg1, dest, op2, dataSize)' else_code = 'DestReg = DestReg;' + invalidate_srcs = ''' + // 4-byte and 8-byte register moves completely overwrites + // the destination register. Therefore, we can safely + // eliminate the destination register's original contents + // as a source dependency. + // Note: Source index 1 is actual architectural source register. + // Note: Because there are other variants of the 'mov' micro-op, + // we need to check the number of source registers to ensure + // we're operating on the register-register version, not the + // register-immediate version, 'movi', say. + if (numSrcRegs() == 3 && (dataSize == 4 || dataSize == 8)) { + setSrcRegIdx(0, RegId()); + setSrcRegIdx(2, RegId()); + } + ''' # Shift instructions