diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh index bd897f90e2..be82429f15 100644 --- a/src/arch/x86/insts/micromediaop.hh +++ b/src/arch/x86/insts/micromediaop.hh @@ -40,6 +40,7 @@ namespace X86ISA enum MediaFlag { MediaMultHiOp = 1, + MediaPartHiOp = 32, MediaSignedOp = 64, MediaScalarOp = 128 }; @@ -77,6 +78,12 @@ class MediaOpBase : public X86MicroopBase return ext & MediaMultHiOp; } + bool + partHi() const + { + return ext & MediaPartHiOp; + } + bool signedOp() const { diff --git a/src/arch/x86/insts/microop_args.hh b/src/arch/x86/insts/microop_args.hh index 9dd121b3b1..c9850ca171 100644 --- a/src/arch/x86/insts/microop_args.hh +++ b/src/arch/x86/insts/microop_args.hh @@ -91,6 +91,19 @@ struct Src2Op {} }; +struct Src3Op +{ + const RegIndex src3; + const size_t size; + RegIndex opIndex() const { return src3; } + + Src3Op(RegIndex _src3, size_t _size) : src3(_src3), size(_size) {} + template + Src3Op(RegIndex _src3, InstType *inst) : src3(_src3), + size(inst->getSrcSize()) + {} +}; + struct DataOp { const RegIndex data; @@ -271,6 +284,8 @@ using FoldedSrc2Op = FoldedOp; using FloatSrc2Op = FloatOp; using IntSrc2Op = IntOp; +using FloatSrc3Op = FloatOp; + using FoldedDataOp = FoldedOp; using FloatDataOp = FloatOp; using FoldedDataHiOp = FoldedOp; diff --git a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa index 0f4330bf7c..ea54e1578d 100644 --- a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa +++ b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa @@ -43,40 +43,102 @@ 0x09: psignw_Vdq_Wdq(); 0x0A: psignd_Vdq_Wdq(); 0x0B: pmulhrsw_Vdq_Wdq(); - 0x10: pblendvb_Vdq_Wdq(); - 0x14: blendvps_Vdq_Wdq(); - 0x15: blendvpd_Vdq_Wdq(); + 0x10: decode MODRM_MOD { + default: Inst::PBLENDVB(Vdq, Wdq); + } + 0x14: decode MODRM_MOD { + default: Inst::BLENDVPS(Vdq, Wdq); + } + 0x15: decode MODRM_MOD { + default: Inst::BLENDVPD(Vdq, Wdq); + } 0x17: ptest_Vdq_Wdq(); - 0x1C: pabsb_Vdq_Wdq(); - 0x1D: pabsw_Vdq_Wdq(); - 0x1E: pabsd_Vdq_Wdq(); - 0x20: pmovsxbw_Vdq_Udq_or_Mq(); - 0x21: pmovsxbd_Vdq_Udq_or_Md(); - 0x22: pmovsxbq_Vdq_Udq_or_Mw(); - 0x23: pmovsxwd_Vdq_Udq_or_Mq(); - 0x24: pmovsxwq_Vdq_Udq_or_Md(); - 0x25: pmovsxdq_Vdq_Udq_or_Mq(); - 0x28: pmuldq_Vdq_Wdq(); + 0x1C: decode MODRM_MOD { + default: Inst::PABSB(Vdq, Wdq); + } + 0x1D: decode MODRM_MOD { + default: Inst::PABSW(Vdq, Wdq); + } + 0x1E: decode MODRM_MOD { + default: Inst::PABSD(Vdq, Wdq); + } + 0x20: decode MODRM_MOD { + default: Inst::PMOVSXBW(Vdq, Wdq); + } + 0x21: decode MODRM_MOD { + default: Inst::PMOVSXBD(Vdq, Wdq); + } + 0x22: decode MODRM_MOD { + default: Inst::PMOVSXBQ(Vdq, Wdq); + } + 0x23: decode MODRM_MOD { + default: Inst::PMOVSXWD(Vdq, Wdq); + } + 0x24: decode MODRM_MOD { + default: Inst::PMOVSXWQ(Vdq, Wdq); + } + 0x25: decode MODRM_MOD { + default: Inst::PMOVSXDQ(Vdq, Wdq); + } + 0x28: decode MODRM_MOD { + default: Inst::PMULDQ(Vdq, Wdq); + } 0x29: pcmpeqq_Vdq_Wdq(); 0x2A: movntdqa_Vdq_Mdq(); - 0x2B: packusdw_Vdq_Wdq(); - 0x30: pmovzxbw_Vdq_Udq_or_Mq(); - 0x31: pmovzxbd_Vdq_Udq_or_Md(); - 0x32: pmovzxbq_Vdq_Udq_or_Mw(); - 0x33: pmovzxwd_Vdq_Udq_or_Mq(); - 0x34: pmovzxwq_Vdq_Udq_or_Md(); - 0x35: pmovzxdq_Vdq_Udq_or_Mq(); - 0x37: pcmpgtq_Vdq_Wdq(); - 0x38: pminsb_Vdq_Wdq(); - 0x39: pminsd_Vdq_Wdq(); - 0x3A: pminuw_Vdq_Wdq(); - 0x3B: pminud_Vdq_Wdq(); - 0x3C: pmaxsb_Vdq_Wdq(); - 0x3D: pmaxsd_Vdq_Wdq(); - 0x3E: pmaxuw_Vdq_Wdq(); - 0x3F: pmaxud_Vdq_Wdq(); - 0x40: pmulld_Vdq_Wdq(); - 0x41: phminposuw_Vdq_Wdq(); + 0x2B: decode MODRM_MOD { + default: Inst::PACKUSDW(Vdq, Wdq); + } + 0x30: decode MODRM_MOD { + default: Inst::PMOVZXBW(Vdq, Wdq); + } + 0x31: decode MODRM_MOD { + default: Inst::PMOVZXBD(Vdq, Wdq); + } + 0x32: decode MODRM_MOD { + default: Inst::PMOVZXBQ(Vdq, Wdq); + } + 0x33: decode MODRM_MOD { + default: Inst::PMOVZXWD(Vdq, Wdq); + } + 0x34: decode MODRM_MOD { + default: Inst::PMOVZXWQ(Vdq, Wdq); + } + 0x35: decode MODRM_MOD { + default: Inst::PMOVZXDQ(Vdq, Wdq); + } + 0x37: decode MODRM_MOD { + default: Inst::PCMPGTQ(Vdq, Wdq); + } + 0x38: decode MODRM_MOD { + default: Inst::PMINSB(Vdq, Wdq); + } + 0x39: decode MODRM_MOD { + default: Inst::PMINSD(Vdq, Wdq); + } + 0x3A: decode MODRM_MOD { + default: Inst::PMINUW(Vdq, Wdq); + } + 0x3B: decode MODRM_MOD { + default: Inst::PMINUD(Vdq, Wdq); + } + 0x3C: decode MODRM_MOD { + default: Inst::PMAXSB(Vdq, Wdq); + } + 0x3D: decode MODRM_MOD { + default: Inst::PMAXSD(Vdq, Wdq); + } + 0x3E: decode MODRM_MOD { + default: Inst::PMAXUW(Vdq, Wdq); + } + 0x3F: decode MODRM_MOD { + default: Inst::PMAXUD(Vdq, Wdq); + } + 0x40: decode MODRM_MOD { + default: Inst::PMULLD(Vdq, Wdq); + } + 0x41: decode MODRM_MOD { + default: Inst::PHMINPOSUW(Vdq, Wdq); + } default: Inst::UD2(); } default: decode LEGACY_REPNE { diff --git a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa index 0c66fa5cff..b1e84733fa 100644 --- a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa +++ b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa @@ -33,22 +33,57 @@ format WarnUnimpl { 1: decode OPCODE_OP { 0x08: roundps_Vdq_Wdq_Ib(); 0x09: roundpd_Vdq_Wdq_Ib(); - 0x0A: roundss_Vss_Wss_Ib(); - 0x0B: roundsd_Vsd_Wsd_Ib(); - 0x0C: blendps_Vdq_Wdq_Ib(); - 0x0D: blendpd_Vdq_Wdq_Ib(); - 0x0E: pblendw_Vdq_Wdq_Ib(); - 0x0F: palignr_Vdq_Wdq_Ib(); - 0x14: pextrb_Rd_or_Mb_Vdq_Ib(); + 0x0A: decode MODRM_MOD { + 0x3: Inst::ROUNDSS(Vdq, Wdq, Ib); + default: Inst::ROUNDSS(Vss, Md, Ib); + } + 0x0B: decode MODRM_MOD { + 0x3: Inst::ROUNDSD(Vss, Wdq, Ib); + default: Inst::ROUNDSD(Vss, Mq, Ib); + } + 0x0C: decode MODRM_MOD { + default: Inst::BLENDPS(Vdq, Wdq, Ib); + } + 0x0D: decode MODRM_MOD { + default: Inst::BLENDPD(Vdq, Wdq, Ib); + } + 0x0E: decode MODRM_MOD { + default: Inst::PBLENDW(Vdq, Wdq, Ib); + } + 0x0F: decode MODRM_MOD { + default: Inst::PALIGNR(Vdq, Wdq, Ib); + } + 0x14: decode MODRM_MOD { + 0x3: Inst::PEXTRB(Rd, Vdq, Ib); + default: Inst::PEXTRB(Mb, Vdq, Ib); + } 0x15: decode MODRM_MOD { 0x3: Inst::PEXTRW(Rd,Vdq,Ib); - default: pextrw_Mw_Vdq_Ib(); + default: Inst::PEXTRW(Mw,Vdq,Ib); + } + 0x16: decode MODRM_MOD { + default: decode REX_W { + 0x0: Inst::PEXTRD(Ed, Vdq, Ib); + 0x1: Inst::PEXTRQ(Eq, Vdq, Ib); + } + } + 0x17: decode MODRM_MOD { + default: Inst::EXTRACTPS(Ed, Vdq, Ib); + } + 0x20: decode MODRM_MOD { + 0x3: Inst::PINSRB(Vdq, Rq, Ib); + default: Inst::PINSRB(Vdq, Mb, Ib); + } + 0x21: decode MODRM_MOD { + 0x3: Inst::INSERTPS(Vdq, Wdq, Ib); + default: Inst::INSERTPS(Vdq, Md, Ib); + } + 0x22: decode MODRM_MOD { + default: decode REX_W { + 0x0: Inst::PINSRD(Vdq, Ed, Ib); + 0x1: Inst::PINSRQ(Vdq, Eq, Ib); + } } - 0x16: pextrd_pextrq_Ed_or_Eq_Vdq_Ib(); - 0x17: extractps_Ed_Vdq_Ib(); - 0x20: pinsrb_Vdq_Rd_or_Rq_or_Mb_Ib(); - 0x21: insertps_Vdq_Udq_or_Md_Ib(); - 0x22: pinsrd_pinsrq_Vdq_Ed_or_Eq_Ib(); 0x40: dpps_Vdq_Wdq_Ib(); 0x41: dppd_Vdq_Wdq_Ib(); 0x42: pcmpistrm_Vdq_Wdq_Ib(); diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py index ec9bf0e06c..e5aaf694b9 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py @@ -173,4 +173,24 @@ def macroop MAXSD_XMM_P { ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 mmaxf xmml, xmml, ufp1, ext=Scalar, size=8 }; + +def macroop PHMINPOSUW_XMM_XMM { + phminposuw xmml, xmmlm, xmmhm, size=2 + xorfp xmmh, xmmh, xmmh +}; + +def macroop PHMINPOSUW_XMM_M { + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + phminposuw xmml, ufp1, ufp2, size=2 + xorfp xmmh, xmmh, xmmh +}; + +def macroop PHMINPOSUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + phminposuw xmml, ufp1, ufp2, size=2 + xorfp xmmh, xmmh, xmmh +}; """ diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py index c8a2d2f2b3..6661dc8120 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py @@ -38,6 +38,7 @@ categories = [ "convert_floating_point_to_xmm_integer", "convert_floating_point_to_mmx_integer", "convert_floating_point_to_gpr_integer", + "round", ] microcode = """ diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py new file mode 100644 index 0000000000..ea2a7341d6 --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py @@ -0,0 +1,72 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +microcode = """ +def macroop ROUNDSS_XMM_XMM_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rounds xmml, xmmlm, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSS_XMM_M_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + rounds xmml, ufp1, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSS_XMM_P_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + rounds xmml, ufp1, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSD_XMM_XMM_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rounds xmml, xmmlm, t1, "IMMEDIATE", size=8 +}; + +def macroop ROUNDSD_XMM_M_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + rounds xmml, ufp1, t1, "IMMEDIATE", size=8 +}; + +def macroop ROUNDSD_XMM_P_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + rounds xmml, ufp1, t1, "IMMEDIATE", size=8 +}; +""" diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py index 2a4a152c9f..607a53d828 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py @@ -276,4 +276,43 @@ def macroop MOVSD_P_XMM { def macroop MOVSD_XMM_XMM { movfp xmml, xmmlm, dataSize=8 }; + +def macroop EXTRACTPS_R_XMM_I { + extractps reg, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps reg, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi +}; + +def macroop EXTRACTPS_M_XMM_I { + extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi + st t1, seg, sib, disp +}; + +def macroop EXTRACTPS_P_XMM_I { + rdip t7 + extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi + st t1, seg, riprel, disp +}; + +def macroop INSERTPS_XMM_XMM_I { + movfp ufp1, xmml, dataSize=8 + insertps xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8 + insertps xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop INSERTPS_XMM_M_I { + movfp ufp1, xmml, dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=4 + insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8 + insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi +}; + +def macroop INSERTPS_XMM_P_I { + rdip t7 + movfp ufp1, xmml, dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=4 + insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8 + insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py index 01ae49f88e..c2de13b845 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py @@ -35,6 +35,7 @@ categories = [ "addition", + "absolute", "subtraction", "multiplication", "multiply_add", diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py new file mode 100644 index 0000000000..daea1b7902 --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py @@ -0,0 +1,96 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +microcode = """ +def macroop PABSB_XMM_XMM { + pabs xmml, xmmlm, size=1 + pabs xmmh, xmmhm, size=1 +}; + +def macroop PABSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=1 + pabs xmmh, ufp2, size=1 +}; + +def macroop PABSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=1 + pabs xmmh, ufp2, size=1 +}; + +def macroop PABSW_XMM_XMM { + pabs xmml, xmmlm, size=2 + pabs xmmh, xmmhm, size=2 +}; + +def macroop PABSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=2 + pabs xmmh, ufp2, size=2 +}; + +def macroop PABSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=2 + pabs xmmh, ufp2, size=2 +}; + +def macroop PABSD_XMM_XMM { + pabs xmml, xmmlm, size=4 + pabs xmmh, xmmhm, size=4 +}; + +def macroop PABSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=4 + pabs xmmh, ufp2, size=4 +}; + +def macroop PABSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=4 + pabs xmmh, ufp2, size=4 +}; +""" diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py index 3246686d2c..6cdde2af57 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py @@ -74,6 +74,26 @@ def macroop PMULLW_XMM_P { mmuli xmmh, xmmh, ufp2, size=2, ext=Signed }; +def macroop PMULLD_XMM_XMM { + mmuli xmml, xmml, xmmlm, size=4, ext=Signed + mmuli xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMULLD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=4, ext=Signed + mmuli xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMULLD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=4, ext=Signed + mmuli xmmh, xmmh, ufp2, size=4, ext=Signed +}; + def macroop PMULHUW_XMM_XMM { mmuli xmml, xmml, xmmlm, size=2, ext = MultHi mmuli xmmh, xmmh, xmmhm, size=2, ext = MultHi @@ -113,4 +133,24 @@ def macroop PMULUDQ_XMM_P { mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar }; + +def macroop PMULDQ_XMM_XMM { + mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; + +def macroop PMULDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; + +def macroop PMULDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py index 7fb4fe621f..548a00e93f 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py @@ -153,4 +153,24 @@ def macroop PCMPGTD_XMM_P { mcmpi2r xmml, xmml, ufp1, size=4, ext=2 mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2 }; + +def macroop PCMPGTQ_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=8, ext=2 + mcmpi2r xmmh, xmmh, xmmhm, size=8, ext=2 +}; + +def macroop PCMPGTQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=8, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2 +}; + +def macroop PCMPGTQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=8, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py index 7e863091a0..5793118e01 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py @@ -54,6 +54,66 @@ def macroop PMINUB_XMM_P { mmini xmmh, xmmh, ufp2, size=1, ext=0 }; +def macroop PMINUW_XMM_XMM { + mmini xmml, xmml, xmmlm, size=2, ext=0 + mmini xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PMINUW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=0 + mmini xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMINUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=0 + mmini xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMINUD_XMM_XMM { + mmini xmml, xmml, xmmlm, size=4, ext=0 + mmini xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PMINUD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=0 + mmini xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMINUD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=0 + mmini xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMINSB_XMM_XMM { + mmini xmml, xmml, xmmlm, size=1, ext=Signed + mmini xmmh, xmmh, xmmhm, size=1, ext=Signed +}; + +def macroop PMINSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=Signed + mmini xmmh, xmmh, ufp2, size=1, ext=Signed +}; + +def macroop PMINSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=Signed + mmini xmmh, xmmh, ufp2, size=1, ext=Signed +}; + def macroop PMINSW_XMM_XMM { mmini xmml, xmml, xmmlm, size=2, ext=Signed mmini xmmh, xmmh, xmmhm, size=2, ext=Signed @@ -74,6 +134,26 @@ def macroop PMINSW_XMM_P { mmini xmmh, xmmh, ufp2, size=2, ext=Signed }; +def macroop PMINSD_XMM_XMM { + mmini xmml, xmml, xmmlm, size=4, ext=Signed + mmini xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMINSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=Signed + mmini xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMINSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=Signed + mmini xmmh, xmmh, ufp2, size=4, ext=Signed +}; + def macroop PMAXUB_XMM_XMM { mmaxi xmml, xmml, xmmlm, size=1, ext=0 mmaxi xmmh, xmmh, xmmhm, size=1, ext=0 @@ -94,6 +174,66 @@ def macroop PMAXUB_XMM_P { mmaxi xmmh, xmmh, ufp2, size=1, ext=0 }; +def macroop PMAXUW_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=2, ext=0 + mmaxi xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PMAXUW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=0 + mmaxi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMAXUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=0 + mmaxi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMAXUD_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=4, ext=0 + mmaxi xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PMAXUD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=0 + mmaxi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMAXUD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=0 + mmaxi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMAXSB_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=1, ext=Signed + mmaxi xmmh, xmmh, xmmhm, size=1, ext=Signed +}; + +def macroop PMAXSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed +}; + +def macroop PMAXSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed +}; + def macroop PMAXSW_XMM_XMM { mmaxi xmml, xmml, xmmlm, size=2, ext=Signed mmaxi xmmh, xmmh, xmmhm, size=2, ext=Signed @@ -113,4 +253,24 @@ def macroop PMAXSW_XMM_P { mmaxi xmml, xmml, ufp1, size=2, ext=Signed mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed }; + +def macroop PMAXSD_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=4, ext=Signed + mmaxi xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMAXSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMAXSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py index f955cbaa16..8d14aa296e 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py @@ -34,11 +34,96 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. microcode = """ +def macroop PEXTRB_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PEXTRB_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 + st t1, seg, sib, disp, dataSize=1 +}; + +def macroop PEXTRB_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 + st t1, seg, riprel, disp, dataSize=1 +}; + def macroop PEXTRW_R_XMM_I { mov2int reg, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1 mov2int reg, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1 }; +def macroop PEXTRW_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1 + st t1, seg, sib, disp, dataSize=2 +}; + +def macroop PEXTRW_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=2, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=2, ext=1 + st t1, seg, riprel, disp, dataSize=2 +}; + +def macroop PEXTRD_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PEXTRD_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 + st t1, seg, sib, disp, dataSize=4 +}; + +def macroop PEXTRD_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 + st t1, seg, riprel, disp, dataSize=4 +}; + +def macroop PEXTRQ_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PEXTRQ_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 + st t1, seg, sib, disp, dataSize=8 +}; + +def macroop PEXTRQ_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 + st t1, seg, riprel, disp, dataSize=8 +}; + +def macroop PINSRB_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PINSRB_XMM_M_I { + ld t1, seg, sib, disp, dataSize=1 + mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PINSRB_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=1 + mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + def macroop PINSRW_XMM_R_I { mov2fp xmml, regm, "IMMEDIATE & mask(3)", size=2, ext=1 mov2fp xmmh, regm, "IMMEDIATE & mask(3)", size=2, ext=1 @@ -56,4 +141,40 @@ def macroop PINSRW_XMM_P_I { mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1 mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1 }; + +def macroop PINSRD_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRD_XMM_M_I { + ld t1, seg, sib, disp, dataSize=4 + mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRD_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=4 + mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRQ_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PINSRQ_XMM_M_I { + ld t1, seg, sib, disp, dataSize=8 + mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PINSRQ_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=8 + mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py index 2307ecfcef..7457e1fa85 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py @@ -76,6 +76,27 @@ def macroop PACKSSWB_XMM_P { pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 }; +def macroop PACKUSDW_XMM_XMM { + pack ufp1, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, xmmlm, xmmhm, ext=0, srcSize=4, destSize=2 + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PACKUSDW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2 +}; + +def macroop PACKUSDW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2 +}; + def macroop PACKUSWB_XMM_XMM { pack ufp1, xmml, xmmh, ext=0, srcSize=2, destSize=1 pack xmmh, xmmlm, xmmhm, ext=0, srcSize=2, destSize=1 @@ -85,8 +106,8 @@ def macroop PACKUSWB_XMM_XMM { def macroop PACKUSWB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 - pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 + pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1 }; def macroop PACKUSWB_XMM_P { diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py index 0fe09b6d37..946d59f6b5 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py @@ -112,4 +112,124 @@ def macroop PSHUFB_XMM_P { movfp xmml, ufp1, dataSize=8 movfp xmmh, ufp2, dataSize=8 }; + +def macroop PBLENDW_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop PBLENDW_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop PBLENDW_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop BLENDPS_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPS_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPS_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPD_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDPD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDPD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDVPD_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPS_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop BLENDVPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop BLENDVPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop PBLENDVB_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; + +def macroop PBLENDVB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; + +def macroop PBLENDVB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py index a539b156fc..4117b59325 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py @@ -33,7 +33,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -categories = ["move", "move_non_temporal", "move_mask"] +categories = ["move", "move_non_temporal", "move_mask", "move_with_shift"] microcode = """ # 128 bit multimedia and scientific data transfer instructions diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py index 576b5dc81c..fb2055ec97 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py @@ -132,4 +132,233 @@ def macroop LDDQU_XMM_P { ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 }; + +def macroop PMOVSXDQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + + +def macroop PMOVZXDQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py new file mode 100644 index 0000000000..155e46b2ea --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py @@ -0,0 +1,59 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.1 + +microcode = """ +def macroop PALIGNR_XMM_XMM_I { + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8 + palignr xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop PALIGNR_XMM_M_I { + ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp3, seg, sib, "DISPLACEMENT + 8", dataSize=8 + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8 + palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop PALIGNR_XMM_P_I { + rdip t7 + ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp3, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8 + palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi +}; +""" diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 632cb07c92..a5ff1d179f 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -187,7 +187,7 @@ let {{ assembler.symbols[reg] = \ ctrlRegIdx(f"misc_reg::{reg.capitalize()}") - for flag in ('Scalar', 'MultHi', 'Signed'): + for flag in ('Scalar', 'MultHi', 'Signed', 'PartHi'): assembler.symbols[flag] = 'Media%sOp' % flag # Code literal which forces a default 64 bit operand size in 64 bit mode. diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa index aded50b472..a4fee547b6 100644 --- a/src/arch/x86/isa/microops/base.isa +++ b/src/arch/x86/isa/microops/base.isa @@ -105,6 +105,8 @@ let {{ idx_name = 'Src1' class Src2Op(object): idx_name = 'Src2' + class Src3Op(object): + idx_name = 'Src3' class RegisterOp(object): def __init__(self, it): @@ -189,6 +191,9 @@ let {{ class IntSrc2Op(IntOp, Src2Op, Operand): pass + class FloatSrc3Op(FloatOp, Src3Op, Operand): + pass + class Op2(object): @classmethod def isDual(cls): @@ -198,6 +203,13 @@ let {{ FloatType = FloatSrc2Op ImmType = Imm8Op + class Op3(object): + @classmethod + def isDual(cls): + return False + + FloatType = FloatSrc3Op + class X86Microop(object): generatorNameTemplate = "generate_%s_%d" diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index 5fcf1d378b..599b5faef5 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -1554,4 +1554,341 @@ let {{ super().__init__(size=2) op_class = 'FloatMiscOp' code = 'FTW = 0xFFFF;' + + class Blend(Media3Op): + def __init__(self, dest, src1, src2=0, **kwargs): + super().__init__(dest, src1, src2, **kwargs) + operand_types = (FloatDestOp, FloatSrc1Op, Imm8Op) + op_class = 'SimdMiscOp' + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(double) / size; + int offset = ext ? items : 0; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + + if (bits(imm8, i + offset)) { + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits); + } else { + // do nothing + } + } + ''' + + class Extmove(Media2Op): + op_class = 'SimdMiscOp' + code = ''' + int items = sizeof(double) / destSize; + int offset = partHi() ? items : 0; + int srcBits = srcSize * 8; + int destBits = destSize * 8; + + for (int i = 0; i < items; i++) { + int hiIndexSrc = (i + offset + 1) * srcBits - 1; + int loIndexSrc = (i + offset + 0) * srcBits; + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndexSrc, loIndexSrc); + if (signedOp()) { + resBits = sext(resBits, srcBits); + } else { + // do nothing, already zero-extended + } + + int hiIndexDest = (i + 1) * destBits - 1; + int loIndexDest = (i + 0) * destBits; + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndexDest, + loIndexDest, resBits); + } + ''' + + class Blendxmm(Media3Op): + op_class = 'SimdMiscOp' + operand_types = (FloatDestOp, FloatSrc1Op, FloatSrc2Op) + code = ''' + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(double) / size; + + for (int i = 0; i < items; i++) { + + int maskBit = bits(FpSrcReg2_uqw, (i + 1) * sizeBits - 1); + + if (maskBit) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits); + } else { + // do nothing + } + } + ''' + + + class Palignr(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, + FloatSrc2Op, FloatSrc3Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, op5, **kwargs): + super().__init__(op1, op2, op3, op4, op5, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + int zeroRegisterBytes = partHi() ? 24 : 32; + if (imm8 >= zeroRegisterBytes) { + FpDestReg_uqw = 0; + } else if (imm8 == 16) { + FpDestReg_uqw = FpDestReg_uqw; + } else if (imm8 == 8) { + FpDestReg_uqw = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + } else if (imm8 == 0) { + FpDestReg_uqw = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw; + } else { + int shift = imm8 * 8; + uint64_t firstFpSrcReg; + uint64_t secondFpSrcReg; + if (imm8 < 8) { + firstFpSrcReg = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw; + secondFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + } else if (imm8 < 16) { + shift -=64; + firstFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + secondFpSrcReg = partHi() ? FpDestReg_uqw : FpDestReg_uqw; + } else if (imm8 < 24) { + shift = partHi() ? 192 - shift : shift - 128; + firstFpSrcReg = partHi() ? 0 : FpDestReg_uqw; + secondFpSrcReg = partHi() ? + FpDestReg_uqw >> (64 - shift) : FpSrcReg1_uqw; + } else { // < 32 + shift = 256 - shift; + firstFpSrcReg = 0; + secondFpSrcReg = FpSrcReg1_uqw >> (64 - shift); + } + + FpDestReg_uqw = firstFpSrcReg >> shift; + int hiIndex = 63; + int loIndex = 64 - shift; + FpDestReg_uqw = insertBits(FpDestReg_uqw, hiIndex, + loIndex, secondFpSrcReg); + } + ''' + + class Extractps(Media3Op): + op_class = 'SimdMiscOp' + operand_types = (IntDestOp, FloatSrc1Op, Imm8Op) + code = ''' + if (imm8 < 2 && !partHi()) { + DestReg = (FpSrcReg1_uqw >> (32 * imm8)) & 0xFFFFFFFF; + } else if (imm8 >= 2 && partHi()) { + int shift = imm8 - 2; + DestReg = (FpSrcReg1_uqw >> (32 * shift)) & 0xFFFFFFFF; + } + ''' + + class Phminposuw(Media3Op): + op_class = 'SimdMiscOp' + code = ''' + int minIndex = 0; + uint64_t min = 0xFFFFF; + int destBits = destSize * 8; + + for (int i = 0; i < 128; i += destBits) { + uint64_t FpSrcReg = i < 64 ? FpSrcReg1_uqw : FpSrcReg2_uqw; + uint16_t value = bits(FpSrcReg, i + destBits - 1, i); + if (value < min) { + min = value; + minIndex = i / destBits; + } + } + + FpDestReg_uqw = min; + FpDestReg_uqw = insertBits(FpDestReg_uqw, 64, destBits, minIndex); + ''' + + class Insertps(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, + FloatSrc2Op, FloatSrc3Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, op5, **kwargs): + super().__init__(op1, op2, op3, op4, op5, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + int countS = bits(imm8, 7, 6); + int countD = bits(imm8, 5, 4); + int zmask = bits(imm8, 3, 0); + + uint64_t tmp = 0; + switch (countS) { + case 0: { + tmp = bits(FpSrcReg2_uqw, 31, 0); + break; + } + case 1: { + tmp = bits(FpSrcReg2_uqw, 63, 32); + break; + } + case 2: { + tmp = bits(FpSrcReg3_uqw, 31, 0); + break; + } + case 3: { + tmp = bits(FpSrcReg3_uqw, 63, 32); + break; + } + } + + uint64_t tmp2l = partHi() ? FpSrcReg1_uqw : FpDestReg_uqw; + uint64_t tmp2h = partHi() ? FpDestReg_uqw : FpSrcReg1_uqw; + + switch (countD) { + case 0: { + tmp2l = insertBits(tmp2l, 31, 0, tmp); + break; + } + case 1: { + tmp2l = insertBits(tmp2l, 63, 32, tmp); + break; + } + case 2: { + tmp2h = insertBits(tmp2h, 31, 0, tmp); + break; + } + case 3: { + tmp2h = insertBits(tmp2h, 63, 32, tmp); + break; + } + } + + if (!partHi()) { + if (bits(zmask, 0)) { + tmp2l = insertBits(tmp2l, 31, 0, 0); + } + if (bits(zmask, 1)) { + tmp2l = insertBits(tmp2l, 63, 32, 0); + } + FpDestReg_uqw = tmp2l; + } else { + if (bits(zmask, 2)) { + tmp2h = insertBits(tmp2h, 31, 0, 0); + } + if (bits(zmask, 3)) { + tmp2h = insertBits(tmp2h, 63, 32, 0); + } + FpDestReg_uqw = tmp2h; + } + + ''' + + class Rounds(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, IntSrc2Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, **kwargs): + super().__init__(op1, op2, op3, op4, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + bool isMXCSR = bits(imm8, 2); + int roundingMode = 0; + if (isMXCSR) { + roundingMode = bits(imm8, 14, 13); + } else { + roundingMode = bits(imm8, 1, 0); + } + + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + double arg; + if (srcSize == 4) { + floatInt fi; + fi.i = bits(FpSrcReg1_uqw, 31, 0); + arg = fi.f; + } else { + doubleInt di; + di.i = bits(FpSrcReg1_uqw, 63, 0); + arg = di.d; + } + + switch (roundingMode) { + case 0: { + // to nearest + arg = std::round(arg); + break; + } + case 1: { + // down + arg = std::floor(arg); + break; + } + case 2: { + // up + arg = std::ceil(arg); + break; + } + case 3: { + // to 0 + arg = std::trunc(arg); + break; + } + } + + int destHiIndex = 0; + uint64_t argBits = 0; + if (destSize == 4) { + floatInt convertBack; + convertBack.f = arg; + argBits = convertBack.i; + destHiIndex = 31; + } else { + doubleInt convertBack; + convertBack.d = arg; + argBits = convertBack.i; + destHiIndex = 63; + } + FpDestReg_uqw = insertBits(FpDestReg_uqw, destHiIndex, 0, argBits); + ''' + + class Pabs(Media2Op): + op_class = 'SimdMiscOp' + code = ''' + int size = srcSize; + int sizeBits = size * 8; + int items = numItems(size); + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + if (bits(arg1Bits, sizeBits - 1)) { + if (size == 1) { + uint8_t tmpBits = (uint8_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else if (size == 2) { + uint16_t tmpBits = (uint16_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else if (size == 4) { + uint32_t tmpBits = (uint32_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else { + assert(false); + } + } + + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, arg1Bits); + } + ''' }}; diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index d8bc947085..ef635c50cc 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -165,8 +165,9 @@ def operands {{ 'R9': IntReg('X86ISA::int_reg::R9', 21), 'FpSrcReg1': FloatReg('src1', 22), 'FpSrcReg2': FloatReg('src2', 23), - 'FpDestReg': FloatReg('dest', 24), - 'FpData': FloatReg('data', 25), + 'FpSrcReg3': FloatReg('src3', 24), + 'FpDestReg': FloatReg('dest', 25), + 'FpData': FloatReg('data', 26), 'RIP': PCStateOp('uqw', 'pc', (None, None, 'IsControl'), 50), 'NRIP': PCStateOp('uqw', 'npc', diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh index 288c5cabe9..eecea02981 100644 --- a/src/base/bitfield.hh +++ b/src/base/bitfield.hh @@ -131,6 +131,22 @@ sext(uint64_t val) return val; } +/** + * Sign-extend an N-bit value to 64 bits. Assumes all bits past the sign are + * currently zero. For true sign extension regardless of the value of the sign + * bit, see szext. + * + * @ingroup api_bitfield + */ +constexpr uint64_t +sext(uint64_t val, int N) +{ + bool sign_bit = bits(val, N - 1); + if (sign_bit) + val |= ~mask(N); + return val; +} + /** * Sign-extend an N-bit value to 64 bits. Zero any bits past the sign if * necessary.